niobures commited on
Commit
74e8c79
·
verified ·
1 Parent(s): c17ac2b
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Examples/Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvcr.io/nvidia/nemo:23.10
2
+
3
+ RUN mkdir -p /workspace/data
4
+ WORKDIR /workspace/data
5
+
6
+ RUN wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/{ssl_model_weights.ckpt,emo_model_weights.ckpt,ctc_model_weights.ckpt,rnnt_model_weights.ckpt,ctc_model_config.yaml,emo_model_config.yaml,encoder_config.yaml,rnnt_model_config.yaml,tokenizer_all_sets.tar,example.wav,long_example.wav}
7
+ RUN tar -xf tokenizer_all_sets.tar && rm tokenizer_all_sets.tar
8
+
9
+ RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
10
+ RUN pip install Cython
11
+ RUN pip install git+https://github.com/NVIDIA/NeMo.git@1fa961ba03ab5f8c91b278640e29807079373372#egg=nemo_toolkit[all]
12
+ RUN pip install -U soundfile
13
+ RUN pip install pyannote.audio==3.2.0
Examples/README.md ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ * [Virtual environment](#virtual-environment)
2
+ * [Docker](#docker)
3
+ * For long-form inference:
4
+ * generate [Hugging Face API token](https://huggingface.co/docs/hub/security-tokens)
5
+ * accept the conditions to access [pyannote/voice-activity-detection](https://huggingface.co/pyannote/voice-activity-detection) files and content
6
+ * accept the conditions to access [pyannote/segmentation](https://huggingface.co/pyannote/segmentation) files and content
7
+
8
+
9
+ ## Virtual environment
10
+ ```bash
11
+ apt install python3-dev
12
+ apt install python3-venv
13
+ apt install ffmpeg libavcodec-extra
14
+ ```
15
+
16
+ ```bash
17
+ python3.10 -m venv venv && . venv/bin/activate
18
+ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
19
+ pip install Cython
20
+ pip install -U wheel
21
+ pip install git+https://github.com/NVIDIA/NeMo.git@1fa961ba03ab5f8c91b278640e29807079373372#egg=nemo_toolkit[all]
22
+ pip install pyannote.audio==3.2.0
23
+ mkdir ./data
24
+ wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/{ssl_model_weights.ckpt,emo_model_weights.ckpt,ctc_model_weights.ckpt,rnnt_model_weights.ckpt,ctc_model_config.yaml,emo_model_config.yaml,encoder_config.yaml,rnnt_model_config.yaml,tokenizer_all_sets.tar,example.wav,long_example.wav} -P ./data && tar -xf ./data/tokenizer_all_sets.tar --directory ./data/ && rm ./data/tokenizer_all_sets.tar
25
+
26
+ # GigaAM
27
+ python ssl_inference.py --encoder_config ./data/encoder_config.yaml \
28
+ --model_weights ./data/ssl_model_weights.ckpt --device cuda --audio_path ./data/example.wav
29
+
30
+ # encoded signal shape: torch.Size([1, 768, 283])
31
+
32
+ # GigaAM-CTC
33
+ python ctc_inference.py --model_config ./data/ctc_model_config.yaml \
34
+ --model_weights ./data/ctc_model_weights.ckpt --device cuda --audio_path ./data/example.wav
35
+
36
+ # transcription: ничьих не требуя похвал счастлив уж я надеждой сладкой что дева с трепетом любви посмотрит может быть украдкой на песни грешные мои у лукоморья дуб зеленый
37
+
38
+ # GigaAM-CTC long-form
39
+ python ctc_longform_inference.py --model_config ./data/ctc_model_config.yaml \
40
+ --model_weights ./data/ctc_model_weights.ckpt --device cuda \
41
+ --audio_path ./data/long_example.wav --hf_token <YOUR_HF_TOKEN>
42
+
43
+ # [00:00:00 - 00:16:83]: вечерня отошла давно но в кельях тихо и темно уже и сам эгумин строгий свои молитвы прекратил и кости ветхие склонил перекрестясь на одр убогий кругом и сон и тишина но церкви дверь отворена
44
+ # [00:17:10 - 00:32:61]: трепещет луч лампады и тускло озаряет он и темную живопись икон и возлощенные оклады и раздается в тишине то тяжкий вздох то шепот важный и мрачно дремлет в вашине старинный свод
45
+ # ...
46
+
47
+ # GigaAM-RNNT
48
+ python rnnt_inference.py --model_config ./data/rnnt_model_config.yaml \
49
+ --model_weights ./data/rnnt_model_weights.ckpt --tokenizer_path ./data/tokenizer_all_sets \
50
+ --device cuda --audio_path ./data/example.wav
51
+
52
+ # transcription: ничьих не требуя похвал счастлив уж я надеждой сладкой что дева с трепетом любви посмотрит может быть украдкой на песни грешные мои у лукоморья дуб зеленый
53
+
54
+ # GigaAM-RNNT long-form
55
+ python rnnt_longform_inference.py --model_config ./data/rnnt_model_config.yaml \
56
+ --model_weights ./data/rnnt_model_weights.ckpt --tokenizer_path ./data/tokenizer_all_sets \
57
+ --device cuda --audio_path ./data/long_example.wav --hf_token <YOUR_HF_TOKEN>
58
+
59
+ # [00:00:00 - 00:16:83]: вечерня отошла давно но в кельях тихо и темно уже и сам игумин строгий свои молитвы прекратил и кости ветхие склонил перекрестясь на одр убогий кругом и сон и тишина но церкви дверь отворена
60
+ # [00:17:10 - 00:32:61]: трепещет луч лампады и тускло озаряет он и темну живопись икон и возлащенные оклады и раздается в тишине то тяжкий вздох то шепот важный и мрачно дремлет в вышине старинный свод
61
+ # ...
62
+
63
+ # GigaAM-Emo
64
+ python emo_inference.py --model_config ./data/emo_model_config.yaml \
65
+ --model_weights ./data/emo_model_weights.ckpt --device cuda --audio_path ./data/example.wav
66
+
67
+ # angry: 0.000, sad: 0.002, neutral: 0.923, positive: 0.074
68
+ ```
69
+
70
+ ## Docker
71
+
72
+ ```bash
73
+ docker build -t gigaam_image .
74
+
75
+ # GigaAM
76
+ docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
77
+ python /workspace/gigaam/ssl_inference.py --encoder_config /workspace/data/encoder_config.yaml \
78
+ --model_weights /workspace/data/ssl_model_weights.ckpt \
79
+ --device cuda --audio_path /workspace/data/example.wav
80
+
81
+ # encoded signal shape: torch.Size([1, 768, 283])
82
+
83
+ # GigaAM-CTC
84
+ docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
85
+ python /workspace/gigaam/ctc_inference.py --model_config /workspace/data/ctc_model_config.yaml \
86
+ --model_weights /workspace/data/ctc_model_weights.ckpt \
87
+ --device cuda --audio_path /workspace/data/example.wav
88
+
89
+ # transcription: ничьих не требуя похвал счастлив уж я надеждой сладкой что дева с трепетом любви посмотрит может быть украдкой на песни грешные мои у лукоморья дуб зеленый
90
+
91
+ # GigaAM-CTC longform
92
+ docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
93
+ python /workspace/gigaam/ctc_longform_inference.py --model_config /workspace/data/ctc_model_config.yaml \
94
+ --model_weights /workspace/data/ctc_model_weights.ckpt --device cuda \
95
+ --audio_path /workspace/data/long_example.wav --hf_token <YOUR_HF_TOKEN>
96
+
97
+ # [00:00:00 - 00:16:83]: вечерня отошла давно но в кельях тихо и темно уже и сам эгумин строгий свои молитвы прекратил и кости ветхие склонил перекрестясь на одр убогий кругом и сон и тишина но церкви дверь отворена
98
+ # [00:17:10 - 00:32:61]: трепещет луч лампады и тускло озаряет он и темную живопись икон и возлощенные оклады и раздается в тишине то тяжкий вздох то шепот важный и мрачно дремлет в вашине старинный свод
99
+ # ...
100
+
101
+ # GigaAM-RNNT
102
+ docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
103
+ python /workspace/gigaam/rnnt_inference.py --model_config /workspace/data/rnnt_model_config.yaml \
104
+ --model_weights /workspace/data/rnnt_model_weights.ckpt --tokenizer_path /workspace/data/tokenizer_all_sets \
105
+ --device cuda --audio_path /workspace/data/example.wav
106
+
107
+ # transcription: ничьих не требуя похвал счастлив уж я надеждой сладкой что дева с трепетом любви посмотрит может быть украдкой на песни грешные мои у лукоморья дуб зеленый
108
+
109
+ # GigaAM-RNNT longform
110
+ docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
111
+ python /workspace/gigaam/rnnt_longform_inference.py --model_config /workspace/data/rnnt_model_config.yaml \
112
+ --model_weights /workspace/data/rnnt_model_weights.ckpt --tokenizer_path /workspace/data/tokenizer_all_sets \
113
+ --device cuda --audio_path /workspace/data/long_example.wav --hf_token <YOUR_HF_TOKEN>
114
+
115
+ # [00:00:00 - 00:16:83]: вечерня отошла давно но в кельях тихо и темно уже и сам игумин строгий свои молитвы прекратил и кости ветхие склонил перекрестясь на одр убогий кругом и сон и тишина но церкви дверь отворена
116
+ # [00:17:10 - 00:32:61]: трепещет луч лампады и тускло озаряет он и темну живопись икон и возлащенные оклады и раздается в тишине то тяжкий вздох то шепот важный и мрачно дремлет в вышине старинный свод
117
+ # ...
118
+
119
+ # GigaAM-Emo
120
+ docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
121
+ python /workspace/gigaam/emo_inference.py --model_config /workspace/data/emo_model_config.yaml \
122
+ --model_weights /workspace/data/emo_model_weights.ckpt \
123
+ --device cuda --audio_path /workspace/data/example.wav
124
+
125
+ # angry: 0.000, sad: 0.002, neutral: 0.923, positive: 0.074
126
+ ```
Examples/ctc_inference.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ import torch
4
+ import torchaudio
5
+ from nemo.collections.asr.models import EncDecCTCModel
6
+ from nemo.collections.asr.modules.audio_preprocessing import (
7
+ AudioToMelSpectrogramPreprocessor as NeMoAudioToMelSpectrogramPreprocessor,
8
+ )
9
+ from nemo.collections.asr.parts.preprocessing.features import (
10
+ FilterbankFeaturesTA as NeMoFilterbankFeaturesTA,
11
+ )
12
+
13
+
14
+ class FilterbankFeaturesTA(NeMoFilterbankFeaturesTA):
15
+ def __init__(self, mel_scale: str = "htk", wkwargs=None, **kwargs):
16
+ if "window_size" in kwargs:
17
+ del kwargs["window_size"]
18
+ if "window_stride" in kwargs:
19
+ del kwargs["window_stride"]
20
+
21
+ super().__init__(**kwargs)
22
+
23
+ self._mel_spec_extractor = torchaudio.transforms.MelSpectrogram(
24
+ sample_rate=self._sample_rate,
25
+ win_length=self.win_length,
26
+ hop_length=self.hop_length,
27
+ n_mels=kwargs["nfilt"],
28
+ window_fn=self.torch_windows[kwargs["window"]],
29
+ mel_scale=mel_scale,
30
+ norm=kwargs["mel_norm"],
31
+ n_fft=kwargs["n_fft"],
32
+ f_max=kwargs.get("highfreq", None),
33
+ f_min=kwargs.get("lowfreq", 0),
34
+ wkwargs=wkwargs,
35
+ )
36
+
37
+
38
+ class AudioToMelSpectrogramPreprocessor(NeMoAudioToMelSpectrogramPreprocessor):
39
+ def __init__(self, mel_scale: str = "htk", **kwargs):
40
+ super().__init__(**kwargs)
41
+ kwargs["nfilt"] = kwargs["features"]
42
+ del kwargs["features"]
43
+ self.featurizer = (
44
+ FilterbankFeaturesTA( # Deprecated arguments; kept for config compatibility
45
+ mel_scale=mel_scale,
46
+ **kwargs,
47
+ )
48
+ )
49
+
50
+
51
+ def _parse_args():
52
+ parser = argparse.ArgumentParser(
53
+ description="Run inference using GigaAM-CTC checkpoint"
54
+ )
55
+ parser.add_argument("--model_config", help="Path to GigaAM-CTC config file (.yaml)")
56
+ parser.add_argument(
57
+ "--model_weights", help="Path to GigaAM-CTC checkpoint file (.ckpt)"
58
+ )
59
+ parser.add_argument("--audio_path", help="Path to audio signal")
60
+ parser.add_argument("--device", help="Device: cpu / cuda")
61
+ return parser.parse_args()
62
+
63
+
64
+ def main(model_config: str, model_weights: str, device: str, audio_path: str):
65
+ model = EncDecCTCModel.from_config_file(model_config)
66
+
67
+ ckpt = torch.load(model_weights, map_location="cpu")
68
+ model.load_state_dict(ckpt, strict=False)
69
+ model = model.to(device)
70
+ model.eval()
71
+
72
+ transcription = model.transcribe([audio_path])[0]
73
+ print(f"transcription: {transcription}")
74
+
75
+
76
+ if __name__ == "__main__":
77
+ args = _parse_args()
78
+ main(
79
+ model_config=args.model_config,
80
+ model_weights=args.model_weights,
81
+ device=args.device,
82
+ audio_path=args.audio_path,
83
+ )
Examples/ctc_longform_inference.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from io import BytesIO
3
+ from typing import List, Tuple
4
+
5
+ import numpy as np
6
+ import torch
7
+ import torchaudio
8
+ from nemo.collections.asr.models import EncDecCTCModel
9
+ from nemo.collections.asr.modules.audio_preprocessing import (
10
+ AudioToMelSpectrogramPreprocessor as NeMoAudioToMelSpectrogramPreprocessor,
11
+ )
12
+ from nemo.collections.asr.parts.preprocessing.features import (
13
+ FilterbankFeaturesTA as NeMoFilterbankFeaturesTA,
14
+ )
15
+ from pyannote.audio import Pipeline
16
+ from pydub import AudioSegment
17
+
18
+
19
+ class FilterbankFeaturesTA(NeMoFilterbankFeaturesTA):
20
+ def __init__(self, mel_scale: str = "htk", wkwargs=None, **kwargs):
21
+ if "window_size" in kwargs:
22
+ del kwargs["window_size"]
23
+ if "window_stride" in kwargs:
24
+ del kwargs["window_stride"]
25
+
26
+ super().__init__(**kwargs)
27
+
28
+ self._mel_spec_extractor = torchaudio.transforms.MelSpectrogram(
29
+ sample_rate=self._sample_rate,
30
+ win_length=self.win_length,
31
+ hop_length=self.hop_length,
32
+ n_mels=kwargs["nfilt"],
33
+ window_fn=self.torch_windows[kwargs["window"]],
34
+ mel_scale=mel_scale,
35
+ norm=kwargs["mel_norm"],
36
+ n_fft=kwargs["n_fft"],
37
+ f_max=kwargs.get("highfreq", None),
38
+ f_min=kwargs.get("lowfreq", 0),
39
+ wkwargs=wkwargs,
40
+ )
41
+
42
+
43
+ class AudioToMelSpectrogramPreprocessor(NeMoAudioToMelSpectrogramPreprocessor):
44
+ def __init__(self, mel_scale: str = "htk", **kwargs):
45
+ super().__init__(**kwargs)
46
+ kwargs["nfilt"] = kwargs["features"]
47
+ del kwargs["features"]
48
+ self.featurizer = (
49
+ FilterbankFeaturesTA( # Deprecated arguments; kept for config compatibility
50
+ mel_scale=mel_scale,
51
+ **kwargs,
52
+ )
53
+ )
54
+
55
+
56
+ def audiosegment_to_numpy(audiosegment: AudioSegment) -> np.ndarray:
57
+ """Convert AudioSegment to numpy array."""
58
+ samples = np.array(audiosegment.get_array_of_samples())
59
+ if audiosegment.channels == 2:
60
+ samples = samples.reshape((-1, 2))
61
+
62
+ samples = samples.astype(np.float32, order="C") / 32768.0
63
+ return samples
64
+
65
+
66
+ def format_time(seconds: float) -> str:
67
+ hours = int(seconds // 3600)
68
+ minutes = int((seconds % 3600) // 60)
69
+ seconds = seconds % 60
70
+ full_seconds = int(seconds)
71
+ milliseconds = int((seconds - full_seconds) * 100)
72
+
73
+ if hours > 0:
74
+ return f"{hours:02}:{minutes:02}:{full_seconds:02}:{milliseconds:02}"
75
+ else:
76
+ return f"{minutes:02}:{full_seconds:02}:{milliseconds:02}"
77
+
78
+
79
+ def segment_audio(
80
+ audio_path: str,
81
+ pipeline: Pipeline,
82
+ max_duration: float = 22.0,
83
+ min_duration: float = 15.0,
84
+ new_chunk_threshold: float = 0.2,
85
+ ) -> Tuple[List[np.ndarray], List[List[float]]]:
86
+ # Prepare audio for pyannote vad pipeline
87
+ audio = AudioSegment.from_wav(audio_path)
88
+ audio_bytes = BytesIO()
89
+ audio.export(audio_bytes, format="wav")
90
+ audio_bytes.seek(0)
91
+
92
+ # Process audio with pipeline to obtain segments with speech activity
93
+ sad_segments = pipeline({"uri": "filename", "audio": audio_bytes})
94
+
95
+ segments = []
96
+ curr_duration = 0
97
+ curr_start = 0
98
+ curr_end = 0
99
+ boundaries = []
100
+
101
+ # Concat segments from pipeline into chunks for asr according to max/min duration
102
+ for segment in sad_segments.get_timeline().support():
103
+ start = max(0, segment.start)
104
+ end = min(len(audio) / 1000, segment.end)
105
+ if (
106
+ curr_duration > min_duration and start - curr_end > new_chunk_threshold
107
+ ) or (curr_duration + (end - curr_end) > max_duration):
108
+ audio_segment = audiosegment_to_numpy(
109
+ audio[curr_start * 1000 : curr_end * 1000]
110
+ )
111
+ segments.append(audio_segment)
112
+ boundaries.append([curr_start, curr_end])
113
+ curr_start = start
114
+
115
+ curr_end = end
116
+ curr_duration = curr_end - curr_start
117
+
118
+ if curr_duration != 0:
119
+ audio_segment = audiosegment_to_numpy(
120
+ audio[curr_start * 1000 : curr_end * 1000]
121
+ )
122
+ segments.append(audio_segment)
123
+ boundaries.append([curr_start, curr_end])
124
+
125
+ return segments, boundaries
126
+
127
+
128
+ def _parse_args():
129
+ parser = argparse.ArgumentParser(
130
+ description="Run long-form inference using GigaAM-CTC checkpoint"
131
+ )
132
+ parser.add_argument("--model_config", help="Path to GigaAM-CTC config file (.yaml)")
133
+ parser.add_argument(
134
+ "--model_weights", help="Path to GigaAM-CTC checkpoint file (.ckpt)"
135
+ )
136
+ parser.add_argument("--audio_path", help="Path to audio signal")
137
+ parser.add_argument(
138
+ "--hf_token", help="HuggingFace token for using pyannote Pipeline"
139
+ )
140
+ parser.add_argument("--device", help="Device: cpu / cuda")
141
+ parser.add_argument("--fp16", help="Run in FP16 mode", default=True)
142
+ parser.add_argument(
143
+ "--batch_size", help="Batch size for acoustic model inference", default=10
144
+ )
145
+ return parser.parse_args()
146
+
147
+
148
+ def main(
149
+ model_config: str,
150
+ model_weights: str,
151
+ device: str,
152
+ audio_path: str,
153
+ hf_token: str,
154
+ fp16: bool,
155
+ batch_size: int = 10,
156
+ ):
157
+ # Initialize model
158
+ model = EncDecCTCModel.from_config_file(model_config)
159
+
160
+ ckpt = torch.load(model_weights, map_location="cpu")
161
+ model.load_state_dict(ckpt, strict=False)
162
+ model = model.to(device)
163
+ if device != "cpu" and fp16:
164
+ model = model.half()
165
+ model.preprocessor = model.preprocessor.float()
166
+ model.eval()
167
+
168
+ # Initialize pyannote pipeline
169
+ pipeline = Pipeline.from_pretrained(
170
+ "pyannote/voice-activity-detection", use_auth_token=hf_token
171
+ )
172
+ pipeline = pipeline.to(torch.device(device))
173
+
174
+ # Segment audio
175
+ segments, boundaries = segment_audio(audio_path, pipeline)
176
+
177
+ # Transcribe segments
178
+ transcriptions = []
179
+ if device != "cpu" and fp16:
180
+ with torch.autocast(device_type="cuda", dtype=torch.float16):
181
+ transcriptions = model.transcribe(segments, batch_size=batch_size)
182
+ else:
183
+ transcriptions = model.transcribe(segments, batch_size=batch_size)
184
+
185
+ for transcription, boundary in zip(transcriptions, boundaries):
186
+ print(
187
+ f"[{format_time(boundary[0])} - {format_time(boundary[1])}]: {transcription}\n"
188
+ )
189
+
190
+
191
+ if __name__ == "__main__":
192
+ args = _parse_args()
193
+ main(
194
+ model_config=args.model_config,
195
+ model_weights=args.model_weights,
196
+ device=args.device,
197
+ audio_path=args.audio_path,
198
+ hf_token=args.hf_token,
199
+ fp16=args.fp16,
200
+ batch_size=args.batch_size,
201
+ )
Examples/emo_inference.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from typing import List, Union
3
+
4
+ import hydra
5
+ import soundfile
6
+ import torch
7
+ from omegaconf import DictConfig, ListConfig, OmegaConf
8
+
9
+
10
+ class SpecScaler(torch.nn.Module):
11
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
12
+ return torch.log(x.clamp_(1e-9, 1e9))
13
+
14
+
15
+ class GigaAMEmo(torch.nn.Module):
16
+ def __init__(self, conf: Union[DictConfig, ListConfig]):
17
+ super().__init__()
18
+ self.id2name = conf.id2name
19
+ self.feature_extractor = hydra.utils.instantiate(conf.feature_extractor)
20
+ self.conformer = hydra.utils.instantiate(conf.encoder)
21
+ self.linear_head = hydra.utils.instantiate(conf.classification_head)
22
+
23
+ @property
24
+ def device(self):
25
+ return next(self.parameters()).device
26
+
27
+ def forward(self, features, features_length=None):
28
+ if features.dim() == 2:
29
+ features = features.unsqueeze(0)
30
+ if not features_length:
31
+ features_length = torch.ones(features.shape[0], device=self.device) * features.shape[-1]
32
+ encoded, _ = self.conformer(audio_signal=features, length=features_length)
33
+ encoded_pooled = torch.nn.functional.avg_pool1d(
34
+ encoded, kernel_size=encoded.shape[-1]
35
+ ).squeeze(-1)
36
+
37
+ logits = self.linear_head(encoded_pooled)
38
+ return logits
39
+
40
+ def get_probs(self, audio_path: str) -> List[List[float]]:
41
+ audio_signal, _ = soundfile.read(audio_path, dtype="float32")
42
+ audio_tensor = torch.tensor(audio_signal).float().to(self.device)
43
+ features = self.feature_extractor(audio_tensor)
44
+ logits = self.forward(features)
45
+ probs = torch.nn.functional.softmax(logits, dim=1).detach().tolist()
46
+ return probs
47
+
48
+
49
+ def _parse_args():
50
+ parser = argparse.ArgumentParser(
51
+ description="Run inference using GigaAM-Emo checkpoint"
52
+ )
53
+ parser.add_argument("--model_config", help="Path to GigaAM-Emo config file (.yaml)")
54
+ parser.add_argument(
55
+ "--model_weights", help="Path to GigaAM-Emo checkpoint file (.ckpt)"
56
+ )
57
+ parser.add_argument("--audio_path", help="Path to audio signal")
58
+ parser.add_argument("--device", help="Device: cpu / cuda")
59
+ return parser.parse_args()
60
+
61
+
62
+ def main(model_config: str, model_weights: str, device: str, audio_path: str):
63
+ conf = OmegaConf.load(model_config)
64
+ model = GigaAMEmo(conf)
65
+ ckpt = torch.load(model_weights, map_location="cpu")
66
+ model.load_state_dict(ckpt, strict=False)
67
+ model = model.to(device)
68
+ model.eval()
69
+ with torch.no_grad():
70
+ probs = model.get_probs(audio_path)[0]
71
+ print(", ".join([f"{model.id2name[i]}: {p:.3f}" for i, p in enumerate(probs)]))
72
+
73
+
74
+ if __name__ == "__main__":
75
+ args = _parse_args()
76
+ main(
77
+ model_config=args.model_config,
78
+ model_weights=args.model_weights,
79
+ device=args.device,
80
+ audio_path=args.audio_path,
81
+ )
Examples/notebooks/GigaAM_CTC_Model_Usage_Example.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Examples/notebooks/GigaAM_Emo_Model_Usage_Example.ipynb ADDED
@@ -0,0 +1,955 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "oREzT-effoFr"
7
+ },
8
+ "source": [
9
+ "### Installing and importing dependencies"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": null,
15
+ "metadata": {
16
+ "colab": {
17
+ "base_uri": "https://localhost:8080/"
18
+ },
19
+ "id": "yxU1SOPfWxab",
20
+ "outputId": "e9b2c73a-d3d4-4ba9-8ce1-3527c95c4d3f"
21
+ },
22
+ "outputs": [
23
+ {
24
+ "name": "stdout",
25
+ "output_type": "stream",
26
+ "text": [
27
+ "Collecting wget\n",
28
+ " Downloading wget-3.2.zip (10 kB)\n",
29
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
30
+ "Building wheels for collected packages: wget\n",
31
+ " Building wheel for wget (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
32
+ " Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9656 sha256=2e82f0e3a185ee764cf0a1eef86b3f525139a342d3630e878d05860de80d6dee\n",
33
+ " Stored in directory: /root/.cache/pip/wheels/8b/f1/7f/5c94f0a7a505ca1c81cd1d9208ae2064675d97582078e6c769\n",
34
+ "Successfully built wget\n",
35
+ "Installing collected packages: wget\n",
36
+ "Successfully installed wget-3.2\n",
37
+ "Reading package lists... Done\n",
38
+ "Building dependency tree... Done\n",
39
+ "Reading state information... Done\n",
40
+ "libsndfile1 is already the newest version (1.0.31-2ubuntu0.1).\n",
41
+ "ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).\n",
42
+ "The following additional packages will be installed:\n",
43
+ " libopencore-amrnb0 libopencore-amrwb0 libsox-fmt-alsa libsox-fmt-base libsox3 libwavpack1\n",
44
+ "Suggested packages:\n",
45
+ " libsox-fmt-all\n",
46
+ "The following NEW packages will be installed:\n",
47
+ " libopencore-amrnb0 libopencore-amrwb0 libsox-fmt-alsa libsox-fmt-base libsox3 libwavpack1 sox\n",
48
+ "0 upgraded, 7 newly installed, 0 to remove and 45 not upgraded.\n",
49
+ "Need to get 617 kB of archives.\n",
50
+ "After this operation, 1,764 kB of additional disk space will be used.\n",
51
+ "Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libopencore-amrnb0 amd64 0.1.5-1 [94.8 kB]\n",
52
+ "Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libopencore-amrwb0 amd64 0.1.5-1 [49.1 kB]\n",
53
+ "Get:3 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox3 amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [240 kB]\n",
54
+ "Get:4 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox-fmt-alsa amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [11.2 kB]\n",
55
+ "Get:5 http://archive.ubuntu.com/ubuntu jammy/main amd64 libwavpack1 amd64 5.4.0-1build2 [83.7 kB]\n",
56
+ "Get:6 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox-fmt-base amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [33.7 kB]\n",
57
+ "Get:7 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 sox amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [104 kB]\n",
58
+ "Fetched 617 kB in 0s (2,444 kB/s)\n",
59
+ "Selecting previously unselected package libopencore-amrnb0:amd64.\n",
60
+ "(Reading database ... 121918 files and directories currently installed.)\n",
61
+ "Preparing to unpack .../0-libopencore-amrnb0_0.1.5-1_amd64.deb ...\n",
62
+ "Unpacking libopencore-amrnb0:amd64 (0.1.5-1) ...\n",
63
+ "Selecting previously unselected package libopencore-amrwb0:amd64.\n",
64
+ "Preparing to unpack .../1-libopencore-amrwb0_0.1.5-1_amd64.deb ...\n",
65
+ "Unpacking libopencore-amrwb0:amd64 (0.1.5-1) ...\n",
66
+ "Selecting previously unselected package libsox3:amd64.\n",
67
+ "Preparing to unpack .../2-libsox3_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
68
+ "Unpacking libsox3:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
69
+ "Selecting previously unselected package libsox-fmt-alsa:amd64.\n",
70
+ "Preparing to unpack .../3-libsox-fmt-alsa_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
71
+ "Unpacking libsox-fmt-alsa:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
72
+ "Selecting previously unselected package libwavpack1:amd64.\n",
73
+ "Preparing to unpack .../4-libwavpack1_5.4.0-1build2_amd64.deb ...\n",
74
+ "Unpacking libwavpack1:amd64 (5.4.0-1build2) ...\n",
75
+ "Selecting previously unselected package libsox-fmt-base:amd64.\n",
76
+ "Preparing to unpack .../5-libsox-fmt-base_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
77
+ "Unpacking libsox-fmt-base:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
78
+ "Selecting previously unselected package sox.\n",
79
+ "Preparing to unpack .../6-sox_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
80
+ "Unpacking sox (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
81
+ "Setting up libsox3:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
82
+ "Setting up libopencore-amrwb0:amd64 (0.1.5-1) ...\n",
83
+ "Setting up libsox-fmt-alsa:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
84
+ "Setting up libwavpack1:amd64 (5.4.0-1build2) ...\n",
85
+ "Setting up libopencore-amrnb0:amd64 (0.1.5-1) ...\n",
86
+ "Setting up libsox-fmt-base:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
87
+ "Setting up sox (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
88
+ "Processing triggers for man-db (2.10.2-1) ...\n",
89
+ "Processing triggers for libc-bin (2.35-0ubuntu3.4) ...\n",
90
+ "/sbin/ldconfig.real: /usr/local/lib/libtbb.so.12 is not a symbolic link\n",
91
+ "\n",
92
+ "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc_proxy.so.2 is not a symbolic link\n",
93
+ "\n",
94
+ "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_5.so.3 is not a symbolic link\n",
95
+ "\n",
96
+ "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_0.so.3 is not a symbolic link\n",
97
+ "\n",
98
+ "/sbin/ldconfig.real: /usr/local/lib/libtbbbind.so.3 is not a symbolic link\n",
99
+ "\n",
100
+ "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc.so.2 is not a symbolic link\n",
101
+ "\n",
102
+ "\u001b[33mDEPRECATION: git+https://github.com/NVIDIA/NeMo.git#egg=nemo_toolkit[all] contains an egg fragment with a non-PEP 508 name pip 25.0 will enforce this behaviour change. A possible replacement is to use the req @ url syntax, and remove the egg fragment. Discussion can be found at https://github.com/pypa/pip/issues/11617\u001b[0m\u001b[33m\n",
103
+ "\u001b[0mCollecting nemo_toolkit[all]\n",
104
+ " Cloning https://github.com/NVIDIA/NeMo.git to /tmp/pip-install-unbwo6dj/nemo-toolkit_de6e0e6e28ce411cafb3187496bb4905\n",
105
+ " Running command git clone --filter=blob:none --quiet https://github.com/NVIDIA/NeMo.git /tmp/pip-install-unbwo6dj/nemo-toolkit_de6e0e6e28ce411cafb3187496bb4905\n",
106
+ " Resolved https://github.com/NVIDIA/NeMo.git to commit 1fa961ba03ab5f8c91b278640e29807079373372\n",
107
+ " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
108
+ " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
109
+ " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
110
+ "Collecting fiddle (from nemo_toolkit[all])\n",
111
+ " Downloading fiddle-0.3.0-py3-none-any.whl (419 kB)\n",
112
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m419.8/419.8 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
113
+ "\u001b[?25hRequirement already satisfied: huggingface-hub>=0.20.3 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.23.1)\n",
114
+ "Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.58.1)\n",
115
+ "Requirement already satisfied: numpy>=1.22 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.25.2)\n",
116
+ "Collecting onnx>=1.7.0 (from nemo_toolkit[all])\n",
117
+ " Downloading onnx-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)\n",
118
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m34.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
119
+ "\u001b[?25hRequirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.8.2)\n",
120
+ "Collecting ruamel.yaml (from nemo_toolkit[all])\n",
121
+ " Downloading ruamel.yaml-0.18.6-py3-none-any.whl (117 kB)\n",
122
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.8/117.8 kB\u001b[0m \u001b[31m14.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
123
+ "\u001b[?25hRequirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.2.2)\n",
124
+ "Requirement already satisfied: setuptools>=65.5.1 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (67.7.2)\n",
125
+ "Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.15.2)\n",
126
+ "Requirement already satisfied: text-unidecode in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.3)\n",
127
+ "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.3.0+cu121)\n",
128
+ "Requirement already satisfied: tqdm>=4.41.0 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (4.66.4)\n",
129
+ "Requirement already satisfied: wget in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.2)\n",
130
+ "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.14.1)\n",
131
+ "Collecting black~=24.3 (from nemo_toolkit[all])\n",
132
+ " Downloading black-24.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.8 MB)\n",
133
+ "\u001b[2K \u001b[90m━━━━━━���━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m53.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
134
+ "\u001b[?25hCollecting click==8.0.2 (from nemo_toolkit[all])\n",
135
+ " Downloading click-8.0.2-py3-none-any.whl (97 kB)\n",
136
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m97.6/97.6 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
137
+ "\u001b[?25hCollecting isort<6.0.0,>5.1.0 (from nemo_toolkit[all])\n",
138
+ " Downloading isort-5.13.2-py3-none-any.whl (92 kB)\n",
139
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.3/92.3 kB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
140
+ "\u001b[?25hCollecting parameterized (from nemo_toolkit[all])\n",
141
+ " Downloading parameterized-0.9.0-py2.py3-none-any.whl (20 kB)\n",
142
+ "Requirement already satisfied: pytest in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (7.4.4)\n",
143
+ "Collecting pytest-mock (from nemo_toolkit[all])\n",
144
+ " Downloading pytest_mock-3.14.0-py3-none-any.whl (9.9 kB)\n",
145
+ "Collecting pytest-runner (from nemo_toolkit[all])\n",
146
+ " Downloading pytest_runner-6.0.1-py3-none-any.whl (7.2 kB)\n",
147
+ "Requirement already satisfied: sphinx in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (5.0.2)\n",
148
+ "Collecting sphinxcontrib-bibtex (from nemo_toolkit[all])\n",
149
+ " Downloading sphinxcontrib_bibtex-2.6.2-py3-none-any.whl (40 kB)\n",
150
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.0/41.0 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
151
+ "\u001b[?25hCollecting wandb (from nemo_toolkit[all])\n",
152
+ " Downloading wandb-0.17.0-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.7 MB)\n",
153
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.7/6.7 MB\u001b[0m \u001b[31m59.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
154
+ "\u001b[?25hRequirement already satisfied: cloudpickle in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.2.1)\n",
155
+ "Collecting hydra-core<=1.3.2,>1.3 (from nemo_toolkit[all])\n",
156
+ " Downloading hydra_core-1.3.2-py3-none-any.whl (154 kB)\n",
157
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m154.5/154.5 kB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
158
+ "\u001b[?25hCollecting omegaconf<=2.3 (from nemo_toolkit[all])\n",
159
+ " Downloading omegaconf-2.3.0-py3-none-any.whl (79 kB)\n",
160
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 kB\u001b[0m \u001b[31m10.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
161
+ "\u001b[?25hCollecting pytorch-lightning>=2.2.1 (from nemo_toolkit[all])\n",
162
+ " Downloading pytorch_lightning-2.2.5-py3-none-any.whl (802 kB)\n",
163
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m802.3/802.3 kB\u001b[0m \u001b[31m45.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
164
+ "\u001b[?25hCollecting torchmetrics>=0.11.0 (from nemo_toolkit[all])\n",
165
+ " Downloading torchmetrics-1.4.0.post0-py3-none-any.whl (868 kB)\n",
166
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m868.8/868.8 kB\u001b[0m \u001b[31m58.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
167
+ "\u001b[?25hCollecting transformers<=4.40.2,>=4.36.0 (from nemo_toolkit[all])\n",
168
+ " Downloading transformers-4.40.2-py3-none-any.whl (9.0 MB)\n",
169
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.0/9.0 MB\u001b[0m \u001b[31m76.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
170
+ "\u001b[?25hCollecting webdataset>=0.2.86 (from nemo_toolkit[all])\n",
171
+ " Downloading webdataset-0.2.86-py3-none-any.whl (70 kB)\n",
172
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m70.4/70.4 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
173
+ "\u001b[?25hCollecting datasets (from nemo_toolkit[all])\n",
174
+ " Downloading datasets-2.19.1-py3-none-any.whl (542 kB)\n",
175
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m542.0/542.0 kB\u001b[0m \u001b[31m46.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
176
+ "\u001b[?25hRequirement already satisfied: inflect in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (7.0.0)\n",
177
+ "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.0.3)\n",
178
+ "Collecting sacremoses>=0.0.43 (from nemo_toolkit[all])\n",
179
+ " Downloading sacremoses-0.1.1-py3-none-any.whl (897 kB)\n",
180
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m897.5/897.5 kB\u001b[0m \u001b[31m58.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
181
+ "\u001b[?25hRequirement already satisfied: sentencepiece<1.0.0 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.1.99)\n",
182
+ "Collecting braceexpand (from nemo_toolkit[all])\n",
183
+ " Downloading braceexpand-0.1.7-py2.py3-none-any.whl (5.9 kB)\n",
184
+ "Requirement already satisfied: editdistance in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.6.2)\n",
185
+ "Collecting einops (from nemo_toolkit[all])\n",
186
+ " Downloading einops-0.8.0-py3-none-any.whl (43 kB)\n",
187
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.2/43.2 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
188
+ "\u001b[?25hCollecting g2p-en (from nemo_toolkit[all])\n",
189
+ " Downloading g2p_en-2.1.0-py3-none-any.whl (3.1 MB)\n",
190
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m78.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
191
+ "\u001b[?25hRequirement already satisfied: ipywidgets in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (7.7.1)\n",
192
+ "Collecting jiwer (from nemo_toolkit[all])\n",
193
+ " Downloading jiwer-3.0.4-py3-none-any.whl (21 kB)\n",
194
+ "Collecting kaldi-python-io (from nemo_toolkit[all])\n",
195
+ " Downloading kaldi-python-io-1.2.2.tar.gz (8.8 kB)\n",
196
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
197
+ "Collecting kaldiio (from nemo_toolkit[all])\n",
198
+ " Downloading kaldiio-2.18.0-py3-none-any.whl (28 kB)\n",
199
+ "Collecting lhotse>=1.22.0 (from nemo_toolkit[all])\n",
200
+ " Downloading lhotse-1.23.0-py3-none-any.whl (772 kB)\n",
201
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m772.4/772.4 kB\u001b[0m \u001b[31m46.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
202
+ "\u001b[?25hRequirement already satisfied: librosa>=0.10.0 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.10.2.post1)\n",
203
+ "Collecting marshmallow (from nemo_toolkit[all])\n",
204
+ " Downloading marshmallow-3.21.2-py3-none-any.whl (49 kB)\n",
205
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
206
+ "\u001b[?25hRequirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.7.1)\n",
207
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (24.0)\n",
208
+ "Collecting pyannote.core (from nemo_toolkit[all])\n",
209
+ " Downloading pyannote.core-5.0.0-py3-none-any.whl (58 kB)\n",
210
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.5/58.5 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
211
+ "\u001b[?25hCollecting pyannote.metrics (from nemo_toolkit[all])\n",
212
+ " Downloading pyannote.metrics-3.2.1-py3-none-any.whl (51 kB)\n",
213
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.4/51.4 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
214
+ "\u001b[?25hCollecting pydub (from nemo_toolkit[all])\n",
215
+ " Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
216
+ "Collecting pyloudnorm (from nemo_toolkit[all])\n",
217
+ " Downloading pyloudnorm-0.1.1-py3-none-any.whl (9.6 kB)\n",
218
+ "Collecting resampy (from nemo_toolkit[all])\n",
219
+ " Downloading resampy-0.4.3-py3-none-any.whl (3.1 MB)\n",
220
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m82.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
221
+ "\u001b[?25hRequirement already satisfied: scipy>=0.14 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.11.4)\n",
222
+ "Requirement already satisfied: soundfile in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.12.1)\n",
223
+ "Collecting sox (from nemo_toolkit[all])\n",
224
+ " Downloading sox-1.5.0.tar.gz (63 kB)\n",
225
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.9/63.9 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
226
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
227
+ "Collecting texterrors (from nemo_toolkit[all])\n",
228
+ " Downloading texterrors-0.4.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n",
229
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m64.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
230
+ "\u001b[?25hCollecting accelerated-scan (from nemo_toolkit[all])\n",
231
+ " Downloading accelerated_scan-0.2.0-py3-none-any.whl (11 kB)\n",
232
+ "Collecting boto3 (from nemo_toolkit[all])\n",
233
+ " Downloading boto3-1.34.113-py3-none-any.whl (139 kB)\n",
234
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.3/139.3 kB\u001b[0m \u001b[31m16.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
235
+ "\u001b[?25hCollecting causal-conv1d>=1.2.0 (from nemo_toolkit[all])\n",
236
+ " Downloading causal_conv1d-1.2.2.post1.tar.gz (7.2 kB)\n",
237
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
238
+ "Collecting faiss-cpu (from nemo_toolkit[all])\n",
239
+ " Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)\n",
240
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.0/27.0 MB\u001b[0m \u001b[31m45.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
241
+ "\u001b[?25hCollecting fasttext (from nemo_toolkit[all])\n",
242
+ " Downloading fasttext-0.9.2.tar.gz (68 kB)\n",
243
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m68.8/68.8 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
244
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
245
+ "Collecting flask-restful (from nemo_toolkit[all])\n",
246
+ " Downloading Flask_RESTful-0.3.10-py2.py3-none-any.whl (26 kB)\n",
247
+ "Collecting ftfy (from nemo_toolkit[all])\n",
248
+ " Downloading ftfy-6.2.0-py3-none-any.whl (54 kB)\n",
249
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.4/54.4 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
250
+ "\u001b[?25hRequirement already satisfied: gdown in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (5.1.0)\n",
251
+ "Requirement already satisfied: h5py in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.9.0)\n",
252
+ "Collecting ijson (from nemo_toolkit[all])\n",
253
+ " Downloading ijson-3.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (111 kB)\n",
254
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.8/111.8 kB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
255
+ "\u001b[?25hRequirement already satisfied: jieba in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.42.1)\n",
256
+ "Collecting markdown2 (from nemo_toolkit[all])\n",
257
+ " Downloading markdown2-2.4.13-py2.py3-none-any.whl (41 kB)\n",
258
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.3/41.3 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
259
+ "\u001b[?25hRequirement already satisfied: nltk>=3.6.5 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.8.1)\n",
260
+ "Collecting opencc<1.1.7 (from nemo_toolkit[all])\n",
261
+ " Downloading OpenCC-1.1.6-cp310-cp310-manylinux1_x86_64.whl (778 kB)\n",
262
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m778.3/778.3 kB\u001b[0m \u001b[31m54.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
263
+ "\u001b[?25hCollecting pangu (from nemo_toolkit[all])\n",
264
+ " Downloading pangu-4.0.6.1-py3-none-any.whl (6.4 kB)\n",
265
+ "Collecting rapidfuzz (from nemo_toolkit[all])\n",
266
+ " Downloading rapidfuzz-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n",
267
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m83.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
268
+ "\u001b[?25hCollecting rouge-score (from nemo_toolkit[all])\n",
269
+ " Downloading rouge_score-0.1.2.tar.gz (17 kB)\n",
270
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
271
+ "Collecting sacrebleu (from nemo_toolkit[all])\n",
272
+ " Downloading sacrebleu-2.4.2-py3-none-any.whl (106 kB)\n",
273
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.7/106.7 kB\u001b[0m \u001b[31m11.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
274
+ "\u001b[?25hCollecting sentence-transformers (from nemo_toolkit[all])\n",
275
+ " Downloading sentence_transformers-2.7.0-py3-none-any.whl (171 kB)\n",
276
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m171.5/171.5 kB\u001b[0m \u001b[31m16.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
277
+ "\u001b[?25hRequirement already satisfied: tensorstore<0.1.46 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.1.45)\n",
278
+ "Collecting zarr (from nemo_toolkit[all])\n",
279
+ " Downloading zarr-2.18.2-py3-none-any.whl (210 kB)\n",
280
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m210.2/210.2 kB\u001b[0m \u001b[31m22.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
281
+ "\u001b[?25hCollecting attrdict (from nemo_toolkit[all])\n",
282
+ " Downloading attrdict-2.0.1-py2.py3-none-any.whl (9.9 kB)\n",
283
+ "Collecting kornia (from nemo_toolkit[all])\n",
284
+ " Downloading kornia-0.7.2-py2.py3-none-any.whl (825 kB)\n",
285
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m825.4/825.4 kB\u001b[0m \u001b[31m57.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
286
+ "\u001b[?25hCollecting pypinyin (from nemo_toolkit[all])\n",
287
+ " Downloading pypinyin-0.51.0-py2.py3-none-any.whl (1.4 MB)\n",
288
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m61.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
289
+ "\u001b[?25hCollecting pypinyin-dict (from nemo_toolkit[all])\n",
290
+ " Downloading pypinyin_dict-0.8.0-py2.py3-none-any.whl (9.5 MB)\n",
291
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.5/9.5 MB\u001b[0m \u001b[31m88.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
292
+ "\u001b[?25hCollecting progress>=1.5 (from nemo_toolkit[all])\n",
293
+ " Downloading progress-1.6.tar.gz (7.8 kB)\n",
294
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
295
+ "Requirement already satisfied: tabulate>=0.8.7 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.9.0)\n",
296
+ "Collecting textdistance>=4.1.5 (from nemo_toolkit[all])\n",
297
+ " Downloading textdistance-4.6.2-py3-none-any.whl (31 kB)\n",
298
+ "Collecting addict (from nemo_toolkit[all])\n",
299
+ " Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)\n",
300
+ "Collecting clip (from nemo_toolkit[all])\n",
301
+ " Downloading clip-0.2.0.tar.gz (5.5 kB)\n",
302
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
303
+ "Collecting decord (from nemo_toolkit[all])\n",
304
+ " Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl (13.6 MB)\n",
305
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.6/13.6 MB\u001b[0m \u001b[31m74.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
306
+ "\u001b[?25hCollecting diffusers>=0.19.3 (from nemo_toolkit[all])\n",
307
+ " Downloading diffusers-0.28.0-py3-none-any.whl (2.2 MB)\n",
308
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━���━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m77.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
309
+ "\u001b[?25hCollecting einops-exts (from nemo_toolkit[all])\n",
310
+ " Downloading einops_exts-0.0.4-py3-none-any.whl (3.9 kB)\n",
311
+ "Requirement already satisfied: imageio in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.31.6)\n",
312
+ "Collecting nerfacc>=0.5.3 (from nemo_toolkit[all])\n",
313
+ " Downloading nerfacc-0.5.3-py3-none-any.whl (54 kB)\n",
314
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.6/54.6 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
315
+ "\u001b[?25hCollecting open-clip-torch (from nemo_toolkit[all])\n",
316
+ " Downloading open_clip_torch-2.24.0-py3-none-any.whl (1.5 MB)\n",
317
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m74.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
318
+ "\u001b[?25hCollecting PyMCubes (from nemo_toolkit[all])\n",
319
+ " Downloading PyMCubes-0.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (274 kB)\n",
320
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m274.3/274.3 kB\u001b[0m \u001b[31m27.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
321
+ "\u001b[?25hCollecting taming-transformers (from nemo_toolkit[all])\n",
322
+ " Downloading taming_transformers-0.0.1-py3-none-any.whl (45 kB)\n",
323
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.6/45.6 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
324
+ "\u001b[?25hCollecting torchdiffeq (from nemo_toolkit[all])\n",
325
+ " Downloading torchdiffeq-0.2.3-py3-none-any.whl (31 kB)\n",
326
+ "Collecting torchsde (from nemo_toolkit[all])\n",
327
+ " Downloading torchsde-0.2.6-py3-none-any.whl (61 kB)\n",
328
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.2/61.2 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
329
+ "\u001b[?25hCollecting trimesh (from nemo_toolkit[all])\n",
330
+ " Downloading trimesh-4.4.0-py3-none-any.whl (694 kB)\n",
331
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m694.6/694.6 kB\u001b[0m \u001b[31m52.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
332
+ "\u001b[?25hCollecting nemo-text-processing (from nemo_toolkit[all])\n",
333
+ " Downloading nemo_text_processing-1.0.2-py3-none-any.whl (2.6 MB)\n",
334
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m79.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
335
+ "\u001b[?25hCollecting mypy-extensions>=0.4.3 (from black~=24.3->nemo_toolkit[all])\n",
336
+ " Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n",
337
+ "Collecting pathspec>=0.9.0 (from black~=24.3->nemo_toolkit[all])\n",
338
+ " Downloading pathspec-0.12.1-py3-none-any.whl (31 kB)\n",
339
+ "Requirement already satisfied: platformdirs>=2 in /usr/local/lib/python3.10/dist-packages (from black~=24.3->nemo_toolkit[all]) (4.2.2)\n",
340
+ "Requirement already satisfied: tomli>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from black~=24.3->nemo_toolkit[all]) (2.0.1)\n",
341
+ "Requirement already satisfied: typing-extensions>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from black~=24.3->nemo_toolkit[all]) (4.11.0)\n",
342
+ "Collecting ninja (from causal-conv1d>=1.2.0->nemo_toolkit[all])\n",
343
+ " Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n",
344
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.2/307.2 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
345
+ "\u001b[?25hRequirement already satisfied: importlib-metadata in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (7.1.0)\n",
346
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (3.14.0)\n",
347
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (2023.12.25)\n",
348
+ "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (2.31.0)\n",
349
+ "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (0.4.3)\n",
350
+ "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (9.4.0)\n",
351
+ "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.20.3->nemo_toolkit[all]) (2023.6.0)\n",
352
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.20.3->nemo_toolkit[all]) (6.0.1)\n",
353
+ "Collecting antlr4-python3-runtime==4.9.* (from hydra-core<=1.3.2,>1.3->nemo_toolkit[all])\n",
354
+ " Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n",
355
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 kB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
356
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
357
+ "INFO: pip is looking at multiple versions of jiwer to determine which version is compatible with other requirements. This could take a while.\n",
358
+ "Collecting jiwer (from nemo_toolkit[all])\n",
359
+ " Downloading jiwer-3.0.3-py3-none-any.whl (21 kB)\n",
360
+ " Downloading jiwer-3.0.2-py3-none-any.whl (21 kB)\n",
361
+ " Downloading jiwer-3.0.1-py3-none-any.whl (21 kB)\n",
362
+ " Downloading jiwer-3.0.0-py3-none-any.whl (21 kB)\n",
363
+ " Downloading jiwer-2.6.0-py3-none-any.whl (20 kB)\n",
364
+ " Downloading jiwer-2.5.2-py3-none-any.whl (15 kB)\n",
365
+ "Collecting rapidfuzz (from nemo_toolkit[all])\n",
366
+ " Downloading rapidfuzz-2.13.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n",
367
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m62.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
368
+ "\u001b[?25hRequirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from lhotse>=1.22.0->nemo_toolkit[all]) (3.0.1)\n",
369
+ "Collecting cytoolz>=0.10.1 (from lhotse>=1.22.0->nemo_toolkit[all])\n",
370
+ " Downloading cytoolz-0.12.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n",
371
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m59.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
372
+ "\u001b[?25hCollecting intervaltree>=3.1.0 (from lhotse>=1.22.0->nemo_toolkit[all])\n",
373
+ " Downloading intervaltree-3.1.0.tar.gz (32 kB)\n",
374
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
375
+ "Collecting lilcom>=1.1.0 (from lhotse>=1.22.0->nemo_toolkit[all])\n",
376
+ " Downloading lilcom-1.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (87 kB)\n",
377
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.1/87.1 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
378
+ "\u001b[?25hRequirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (1.4.2)\n",
379
+ "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (4.4.2)\n",
380
+ "Requirement already satisfied: pooch>=1.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (1.8.1)\n",
381
+ "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (0.3.7)\n",
382
+ "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (0.4)\n",
383
+ "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (1.0.8)\n",
384
+ "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (1.2.1)\n",
385
+ "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (0.12.1)\n",
386
+ "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (4.51.0)\n",
387
+ "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (1.4.5)\n",
388
+ "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (3.1.2)\n",
389
+ "Requirement already satisfied: rich>=12 in /usr/local/lib/python3.10/dist-packages (from nerfacc>=0.5.3->nemo_toolkit[all]) (13.7.1)\n",
390
+ "Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->nemo_toolkit[all]) (0.41.1)\n",
391
+ "Requirement already satisfied: protobuf>=3.20.2 in /usr/local/lib/python3.10/dist-packages (from onnx>=1.7.0->nemo_toolkit[all]) (3.20.3)\n",
392
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil->nemo_toolkit[all]) (1.16.0)\n",
393
+ "Collecting lightning-utilities>=0.8.0 (from pytorch-lightning>=2.2.1->nemo_toolkit[all])\n",
394
+ " Downloading lightning_utilities-0.11.2-py3-none-any.whl (26 kB)\n",
395
+ "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->nemo_toolkit[all]) (3.5.0)\n",
396
+ "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile->nemo_toolkit[all]) (1.16.0)\n",
397
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (1.12)\n",
398
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (3.3)\n",
399
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (3.1.4)\n",
400
+ "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
401
+ " Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n",
402
+ "Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
403
+ " Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n",
404
+ "Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
405
+ " Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n",
406
+ "Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->nemo_toolkit[all])\n",
407
+ " Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n",
408
+ "Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->nemo_toolkit[all])\n",
409
+ " Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n",
410
+ "Collecting nvidia-cufft-cu12==11.0.2.54 (from torch->nemo_toolkit[all])\n",
411
+ " Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n",
412
+ "Collecting nvidia-curand-cu12==10.3.2.106 (from torch->nemo_toolkit[all])\n",
413
+ " Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n",
414
+ "Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch->nemo_toolkit[all])\n",
415
+ " Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n",
416
+ "Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch->nemo_toolkit[all])\n",
417
+ " Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n",
418
+ "Collecting nvidia-nccl-cu12==2.20.5 (from torch->nemo_toolkit[all])\n",
419
+ " Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n",
420
+ "Collecting nvidia-nvtx-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
421
+ " Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n",
422
+ "Requirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (2.3.0)\n",
423
+ "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch->nemo_toolkit[all])\n",
424
+ " Downloading nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl (21.3 MB)\n",
425
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m57.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
426
+ "\u001b[?25hRequirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers<=4.40.2,>=4.36.0->nemo_toolkit[all]) (0.19.1)\n",
427
+ "Collecting botocore<1.35.0,>=1.34.113 (from boto3->nemo_toolkit[all])\n",
428
+ " Downloading botocore-1.34.113-py3-none-any.whl (12.3 MB)\n",
429
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m78.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
430
+ "\u001b[?25hCollecting jmespath<2.0.0,>=0.7.1 (from boto3->nemo_toolkit[all])\n",
431
+ " Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n",
432
+ "Collecting s3transfer<0.11.0,>=0.10.0 (from boto3->nemo_toolkit[all])\n",
433
+ " Downloading s3transfer-0.10.1-py3-none-any.whl (82 kB)\n",
434
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━���━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.2/82.2 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
435
+ "\u001b[?25hRequirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->nemo_toolkit[all]) (14.0.2)\n",
436
+ "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets->nemo_toolkit[all]) (0.6)\n",
437
+ "Collecting dill<0.3.9,>=0.3.0 (from datasets->nemo_toolkit[all])\n",
438
+ " Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
439
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
440
+ "\u001b[?25hCollecting xxhash (from datasets->nemo_toolkit[all])\n",
441
+ " Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
442
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m10.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
443
+ "\u001b[?25hCollecting multiprocess (from datasets->nemo_toolkit[all])\n",
444
+ " Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
445
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m15.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
446
+ "\u001b[?25hRequirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->nemo_toolkit[all]) (3.9.5)\n",
447
+ "Collecting pybind11>=2.2 (from fasttext->nemo_toolkit[all])\n",
448
+ " Using cached pybind11-2.12.0-py3-none-any.whl (234 kB)\n",
449
+ "Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from fiddle->nemo_toolkit[all]) (1.4.0)\n",
450
+ "Requirement already satisfied: graphviz in /usr/local/lib/python3.10/dist-packages (from fiddle->nemo_toolkit[all]) (0.20.3)\n",
451
+ "Collecting libcst (from fiddle->nemo_toolkit[all])\n",
452
+ " Downloading libcst-1.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n",
453
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m65.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
454
+ "\u001b[?25hCollecting aniso8601>=0.82 (from flask-restful->nemo_toolkit[all])\n",
455
+ " Downloading aniso8601-9.0.1-py2.py3-none-any.whl (52 kB)\n",
456
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m52.8/52.8 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
457
+ "\u001b[?25hRequirement already satisfied: Flask>=0.8 in /usr/local/lib/python3.10/dist-packages (from flask-restful->nemo_toolkit[all]) (2.2.5)\n",
458
+ "Requirement already satisfied: pytz in /usr/local/lib/python3.10/dist-packages (from flask-restful->nemo_toolkit[all]) (2023.4)\n",
459
+ "Requirement already satisfied: wcwidth<0.3.0,>=0.2.12 in /usr/local/lib/python3.10/dist-packages (from ftfy->nemo_toolkit[all]) (0.2.13)\n",
460
+ "Collecting distance>=0.1.3 (from g2p-en->nemo_toolkit[all])\n",
461
+ " Downloading Distance-0.1.3.tar.gz (180 kB)\n",
462
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m180.3/180.3 kB\u001b[0m \u001b[31m16.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
463
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
464
+ "Requirement already satisfied: pydantic>=1.9.1 in /usr/local/lib/python3.10/dist-packages (from inflect->nemo_toolkit[all]) (2.7.1)\n",
465
+ "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from gdown->nemo_toolkit[all]) (4.12.3)\n",
466
+ "Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (5.5.6)\n",
467
+ "Requirement already satisfied: ipython-genutils~=0.2.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (0.2.0)\n",
468
+ "Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (5.7.1)\n",
469
+ "Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (3.6.6)\n",
470
+ "Requirement already satisfied: ipython>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (7.34.0)\n",
471
+ "Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (3.0.10)\n",
472
+ "Collecting kornia-rs>=0.1.0 (from kornia->nemo_toolkit[all])\n",
473
+ " Downloading kornia_rs-0.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)\n",
474
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m89.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
475
+ "\u001b[?25hCollecting cdifflib (from nemo-text-processing->nemo_toolkit[all])\n",
476
+ " Downloading cdifflib-1.2.6.tar.gz (11 kB)\n",
477
+ " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
478
+ " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
479
+ " Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n",
480
+ " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
481
+ "Collecting pynini==2.1.5 (from nemo-text-processing->nemo_toolkit[all])\n",
482
+ " Downloading pynini-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (161.3 MB)\n",
483
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m161.3/161.3 MB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
484
+ "\u001b[?25hRequirement already satisfied: Cython>=0.29 in /usr/local/lib/python3.10/dist-packages (from pynini==2.1.5->nemo-text-processing->nemo_toolkit[all]) (3.0.10)\n",
485
+ "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from open-clip-torch->nemo_toolkit[all]) (0.18.0+cu121)\n",
486
+ "Collecting timm (from open-clip-torch->nemo_toolkit[all])\n",
487
+ " Downloading timm-1.0.3-py3-none-any.whl (2.3 MB)\n",
488
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m85.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
489
+ "\u001b[?25hRequirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->nemo_toolkit[all]) (2024.1)\n",
490
+ "Requirement already satisfied: sortedcontainers>=2.0.4 in /usr/local/lib/python3.10/dist-packages (from pyannote.core->nemo_toolkit[all]) (2.4.0)\n",
491
+ "Collecting pyannote.database>=4.0.1 (from pyannote.metrics->nemo_toolkit[all])\n",
492
+ " Downloading pyannote.database-5.1.0-py3-none-any.whl (48 kB)\n",
493
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m48.1/48.1 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
494
+ "\u001b[?25hCollecting docopt>=0.6.2 (from pyannote.metrics->nemo_toolkit[all])\n",
495
+ " Downloading docopt-0.6.2.tar.gz (25 kB)\n",
496
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
497
+ "Requirement already satisfied: future>=0.16.0 in /usr/local/lib/python3.10/dist-packages (from pyloudnorm->nemo_toolkit[all]) (0.18.3)\n",
498
+ "Requirement already satisfied: iniconfig in /usr/local/lib/python3.10/dist-packages (from pytest->nemo_toolkit[all]) (2.0.0)\n",
499
+ "Requirement already satisfied: pluggy<2.0,>=0.12 in /usr/local/lib/python3.10/dist-packages (from pytest->nemo_toolkit[all]) (1.5.0)\n",
500
+ "Requirement already satisfied: exceptiongroup>=1.0.0rc8 in /usr/local/lib/python3.10/dist-packages (from pytest->nemo_toolkit[all]) (1.2.1)\n",
501
+ "Collecting ruamel.yaml.clib>=0.2.7 (from ruamel.yaml->nemo_toolkit[all])\n",
502
+ " Downloading ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (526 kB)\n",
503
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m526.7/526.7 kB\u001b[0m \u001b[31m48.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
504
+ "\u001b[?25hCollecting portalocker (from sacrebleu->nemo_toolkit[all])\n",
505
+ " Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)\n",
506
+ "Collecting colorama (from sacrebleu->nemo_toolkit[all])\n",
507
+ " Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n",
508
+ "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from sacrebleu->nemo_toolkit[all]) (4.9.4)\n",
509
+ "Requirement already satisfied: sphinxcontrib-applehelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.8)\n",
510
+ "Requirement already satisfied: sphinxcontrib-devhelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.6)\n",
511
+ "Requirement already satisfied: sphinxcontrib-jsmath in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.1)\n",
512
+ "Requirement already satisfied: sphinxcontrib-htmlhelp>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.0.5)\n",
513
+ "Requirement already satisfied: sphinxcontrib-serializinghtml>=1.1.5 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.1.10)\n",
514
+ "Requirement already satisfied: sphinxcontrib-qthelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.7)\n",
515
+ "Requirement already satisfied: Pygments>=2.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.16.1)\n",
516
+ "Requirement already satisfied: docutils<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (0.18.1)\n",
517
+ "Requirement already satisfied: snowballstemmer>=1.1 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.2.0)\n",
518
+ "Requirement already satisfied: babel>=1.3 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.15.0)\n",
519
+ "Requirement already satisfied: alabaster<0.8,>=0.7 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (0.7.16)\n",
520
+ "Requirement already satisfied: imagesize in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.4.1)\n",
521
+ "Collecting docutils<0.19,>=0.14 (from sphinx->nemo_toolkit[all])\n",
522
+ " Downloading docutils-0.17.1-py2.py3-none-any.whl (575 kB)\n",
523
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m575.5/575.5 kB\u001b[0m \u001b[31m48.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
524
+ "\u001b[?25hCollecting pybtex>=0.24 (from sphinxcontrib-bibtex->nemo_toolkit[all])\n",
525
+ " Downloading pybtex-0.24.0-py2.py3-none-any.whl (561 kB)\n",
526
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m561.4/561.4 kB\u001b[0m \u001b[31m50.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
527
+ "\u001b[?25hCollecting pybtex-docutils>=1.0.0 (from sphinxcontrib-bibtex->nemo_toolkit[all])\n",
528
+ " Downloading pybtex_docutils-1.0.3-py3-none-any.whl (6.4 kB)\n",
529
+ "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (1.64.0)\n",
530
+ "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (2.27.0)\n",
531
+ "Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (1.2.0)\n",
532
+ "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (3.6)\n",
533
+ "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (0.7.2)\n",
534
+ "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (3.0.3)\n",
535
+ "Collecting plac (from texterrors->nemo_toolkit[all])\n",
536
+ " Downloading plac-1.4.3-py2.py3-none-any.whl (22 kB)\n",
537
+ "Collecting loguru (from texterrors->nemo_toolkit[all])\n",
538
+ " Downloading loguru-0.7.2-py3-none-any.whl (62 kB)\n",
539
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.5/62.5 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
540
+ "\u001b[?25hRequirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from texterrors->nemo_toolkit[all]) (2.4.0)\n",
541
+ "Collecting Levenshtein (from texterrors->nemo_toolkit[all])\n",
542
+ " Downloading Levenshtein-0.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (177 kB)\n",
543
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m19.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
544
+ "\u001b[?25hCollecting trampoline>=0.1.2 (from torchsde->nemo_toolkit[all])\n",
545
+ " Downloading trampoline-0.1.2-py3-none-any.whl (5.2 kB)\n",
546
+ "Collecting docker-pycreds>=0.4.0 (from wandb->nemo_toolkit[all])\n",
547
+ " Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n",
548
+ "Collecting gitpython!=3.1.29,>=1.0.0 (from wandb->nemo_toolkit[all])\n",
549
+ " Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)\n",
550
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.3/207.3 kB\u001b[0m \u001b[31m17.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
551
+ "\u001b[?25hRequirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb->nemo_toolkit[all]) (5.9.5)\n",
552
+ "Collecting sentry-sdk>=1.0.0 (from wandb->nemo_toolkit[all])\n",
553
+ " Downloading sentry_sdk-2.3.1-py2.py3-none-any.whl (289 kB)\n",
554
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m289.0/289.0 kB\u001b[0m \u001b[31m28.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
555
+ "\u001b[?25hCollecting setproctitle (from wandb->nemo_toolkit[all])\n",
556
+ " Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)\n",
557
+ "Collecting asciitree (from zarr->nemo_toolkit[all])\n",
558
+ " Downloading asciitree-0.3.3.tar.gz (4.0 kB)\n",
559
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
560
+ "Collecting numcodecs>=0.10.0 (from zarr->nemo_toolkit[all])\n",
561
+ " Downloading numcodecs-0.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.7 MB)\n",
562
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.7/7.7 MB\u001b[0m \u001b[31m63.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
563
+ "\u001b[?25hCollecting fasteners (from zarr->nemo_toolkit[all])\n",
564
+ " Downloading fasteners-0.19-py3-none-any.whl (18 kB)\n",
565
+ "Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /usr/local/lib/python3.10/dist-packages (from botocore<1.35.0,>=1.34.113->boto3->nemo_toolkit[all]) (2.0.7)\n",
566
+ "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile->nemo_toolkit[all]) (2.22)\n",
567
+ "Requirement already satisfied: toolz>=0.8.0 in /usr/local/lib/python3.10/dist-packages (from cytoolz>=0.10.1->lhotse>=1.22.0->nemo_toolkit[all]) (0.12.1)\n",
568
+ "Requirement already satisfied: itsdangerous>=2.0 in /usr/local/lib/python3.10/dist-packages (from Flask>=0.8->flask-restful->nemo_toolkit[all]) (2.2.0)\n",
569
+ "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (1.3.1)\n",
570
+ "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (23.2.0)\n",
571
+ "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (1.4.1)\n",
572
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (6.0.5)\n",
573
+ "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (1.9.4)\n",
574
+ "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (4.0.3)\n",
575
+ "Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.29,>=1.0.0->wandb->nemo_toolkit[all])\n",
576
+ " Downloading gitdb-4.0.11-py3-none-any.whl (62 kB)\n",
577
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
578
+ "\u001b[?25hRequirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (5.3.3)\n",
579
+ "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (0.4.0)\n",
580
+ "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (4.9)\n",
581
+ "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<2,>=0.5->tensorboard->nemo_toolkit[all]) (1.3.1)\n",
582
+ "Requirement already satisfied: jupyter-client in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets->nemo_toolkit[all]) (6.1.12)\n",
583
+ "Requirement already satisfied: tornado>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets->nemo_toolkit[all]) (6.3.3)\n",
584
+ "Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all])\n",
585
+ " Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)\n",
586
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m73.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
587
+ "\u001b[?25hRequirement already satisfied: pickleshare in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.7.5)\n",
588
+ "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (3.0.43)\n",
589
+ "Requirement already satisfied: backcall in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.2.0)\n",
590
+ "Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.1.7)\n",
591
+ "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (4.9.0)\n",
592
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->nemo_toolkit[all]) (2.1.5)\n",
593
+ "Collecting typer>=0.12.1 (from pyannote.database>=4.0.1->pyannote.metrics->nemo_toolkit[all])\n",
594
+ " Downloading typer-0.12.3-py3-none-any.whl (47 kB)\n",
595
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
596
+ "\u001b[?25hCollecting latexcodec>=1.0.4 (from pybtex>=0.24->sphinxcontrib-bibtex->nemo_toolkit[all])\n",
597
+ " Downloading latexcodec-3.0.0-py3-none-any.whl (18 kB)\n",
598
+ "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9.1->inflect->nemo_toolkit[all]) (0.7.0)\n",
599
+ "Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9.1->inflect->nemo_toolkit[all]) (2.18.2)\n",
600
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (3.3.2)\n",
601
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (3.7)\n",
602
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (2024.2.2)\n",
603
+ "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=12->nerfacc>=0.5.3->nemo_toolkit[all]) (3.0.0)\n",
604
+ "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->nemo_toolkit[all]) (1.3.0)\n",
605
+ "Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.10/dist-packages (from widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (6.5.5)\n",
606
+ "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->gdown->nemo_toolkit[all]) (2.5)\n",
607
+ "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata->diffusers>=0.19.3->nemo_toolkit[all]) (3.18.2)\n",
608
+ "INFO: pip is looking at multiple versions of levenshtein to determine which version is compatible with other requirements. This could take a while.\n",
609
+ "Collecting Levenshtein (from texterrors->nemo_toolkit[all])\n",
610
+ " Downloading Levenshtein-0.25.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (177 kB)\n",
611
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m20.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
612
+ "\u001b[?25h Downloading Levenshtein-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (177 kB)\n",
613
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m21.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
614
+ "\u001b[?25h Downloading Levenshtein-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (169 kB)\n",
615
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m169.4/169.4 kB\u001b[0m \u001b[31m19.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
616
+ "\u001b[?25h Downloading Levenshtein-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (172 kB)\n",
617
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m172.9/172.9 kB\u001b[0m \u001b[31m19.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
618
+ "\u001b[?25hRequirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (1.7.1)\n",
619
+ "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb->nemo_toolkit[all])\n",
620
+ " Downloading smmap-5.0.1-py3-none-any.whl (24 kB)\n",
621
+ "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from jedi>=0.16->ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.8.4)\n",
622
+ "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=12->nerfacc>=0.5.3->nemo_toolkit[all]) (0.1.2)\n",
623
+ "Requirement already satisfied: pyzmq<25,>=17 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (24.0.1)\n",
624
+ "Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (23.1.0)\n",
625
+ "Requirement already satisfied: jupyter-core>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (5.7.2)\n",
626
+ "Requirement already satisfied: nbformat in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (5.10.4)\n",
627
+ "Requirement already satisfied: nbconvert>=5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (6.5.4)\n",
628
+ "Requirement already satisfied: nest-asyncio>=1.5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.6.0)\n",
629
+ "Requirement already satisfied: Send2Trash>=1.8.0 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.8.3)\n",
630
+ "Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.18.1)\n",
631
+ "Requirement already satisfied: prometheus-client in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.20.0)\n",
632
+ "Requirement already satisfied: nbclassic>=0.4.7 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.0.0)\n",
633
+ "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.10/dist-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.7.0)\n",
634
+ "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (0.6.0)\n",
635
+ "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard->nemo_toolkit[all]) (3.2.2)\n",
636
+ "Collecting shellingham>=1.3.0 (from typer>=0.12.1->pyannote.database>=4.0.1->pyannote.metrics->nemo_toolkit[all])\n",
637
+ " Downloading shellingham-1.5.4-py2.py3-none-any.whl (9.8 kB)\n",
638
+ "Requirement already satisfied: jupyter-server>=1.8 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.24.0)\n",
639
+ "Requirement already satisfied: notebook-shim>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.2.4)\n",
640
+ "Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (6.1.0)\n",
641
+ "Requirement already satisfied: defusedxml in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.7.1)\n",
642
+ "Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.4)\n",
643
+ "Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.3.0)\n",
644
+ "Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.8.4)\n",
645
+ "Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.10.0)\n",
646
+ "Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.5.1)\n",
647
+ "Requirement already satisfied: tinycss2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.3.0)\n",
648
+ "Requirement already satisfied: fastjsonschema>=2.15 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (2.19.1)\n",
649
+ "Requirement already satisfied: jsonschema>=2.6 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (4.19.2)\n",
650
+ "Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.10/dist-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (21.2.0)\n",
651
+ "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (2023.12.1)\n",
652
+ "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.35.1)\n",
653
+ "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.18.1)\n",
654
+ "Requirement already satisfied: anyio<4,>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (3.7.1)\n",
655
+ "Requirement already satisfied: websocket-client in /usr/local/lib/python3.10/dist-packages (from jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.8.0)\n",
656
+ "Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.5.1)\n",
657
+ "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.3.1)\n",
658
+ "Building wheels for collected packages: causal-conv1d, antlr4-python3-runtime, progress, clip, fasttext, kaldi-python-io, nemo_toolkit, rouge-score, sox, distance, docopt, intervaltree, asciitree, cdifflib\n",
659
+ " Building wheel for causal-conv1d (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
660
+ " Created wheel for causal-conv1d: filename=causal_conv1d-1.2.2.post1-cp310-cp310-linux_x86_64.whl size=103643300 sha256=2bba8823ae89bd79c2d067978e0e533fab8298f69855bfc5d199828b278cf66c\n",
661
+ " Stored in directory: /root/.cache/pip/wheels/22/a7/db/0c9482dec3707ad23181b0eb2da40e4b8f26aaed49752fc49f\n",
662
+ " Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
663
+ " Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144554 sha256=d26518c128f80048ec70721551489517353867c7668a281f27cf1a20b9acd114\n",
664
+ " Stored in directory: /root/.cache/pip/wheels/12/93/dd/1f6a127edc45659556564c5730f6d4e300888f4bca2d4c5a88\n",
665
+ " Building wheel for progress (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
666
+ " Created wheel for progress: filename=progress-1.6-py3-none-any.whl size=9614 sha256=87c634c79d4e56e317499682766011b5d0e28953e43f6a3754957d0f4fd3633a\n",
667
+ " Stored in directory: /root/.cache/pip/wheels/a2/68/5f/c339b20a41659d856c93ccdce6a33095493eb82c3964aac5a1\n",
668
+ " Building wheel for clip (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
669
+ " Created wheel for clip: filename=clip-0.2.0-py3-none-any.whl size=6989 sha256=3e9ac01ba0eff273ea70feaf80d486a07683956515496b6dfeeafe81c9caae24\n",
670
+ " Stored in directory: /root/.cache/pip/wheels/7f/5c/e6/2c0fdb453a3569188864b17e9676bea8b3b7e160c037117869\n",
671
+ " Building wheel for fasttext (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
672
+ " Created wheel for fasttext: filename=fasttext-0.9.2-cp310-cp310-linux_x86_64.whl size=4227140 sha256=708a73871f9ae384ea66b706bb0b73b6c624f23ce0d19882b6711b31abed8091\n",
673
+ " Stored in directory: /root/.cache/pip/wheels/a5/13/75/f811c84a8ab36eedbaef977a6a58a98990e8e0f1967f98f394\n",
674
+ " Building wheel for kaldi-python-io (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
675
+ " Created wheel for kaldi-python-io: filename=kaldi_python_io-1.2.2-py3-none-any.whl size=8949 sha256=5399346b043c1ae3d7431729bbd34a5206a1bbe26c41b5ba69d2b45879740d55\n",
676
+ " Stored in directory: /root/.cache/pip/wheels/b7/23/5f/49d3a826be576faf61d84e8028e1914bb36a5586ee2613b087\n",
677
+ " Building wheel for nemo_toolkit (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
678
+ " Created wheel for nemo_toolkit: filename=nemo_toolkit-2.0.0rc1-py3-none-any.whl size=3709778 sha256=3eb9e4278cef98370e97bf7cc0f009cdebbdaaf8fac7a6584289fdd8abfbd8c8\n",
679
+ " Stored in directory: /tmp/pip-ephem-wheel-cache-sdhc6zr1/wheels/c3/4e/45/ab3d29aa73df619f27b371cacf809d5330a18f794879163c1b\n",
680
+ " Building wheel for rouge-score (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
681
+ " Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=78515a9f3e94b274e69e68c059af462dc7cc1b10b51c1b6d419704ea6b4cffe5\n",
682
+ " Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n",
683
+ " Building wheel for sox (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
684
+ " Created wheel for sox: filename=sox-1.5.0-py3-none-any.whl size=40038 sha256=1c48c5456291b6b4859918dc81caa53229a5114b68c2772f1a5518f6c6a21254\n",
685
+ " Stored in directory: /root/.cache/pip/wheels/74/e7/7b/8033be3ec5e4994595d01269fc9657c8fd83a0dcbf8536666a\n",
686
+ " Building wheel for distance (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
687
+ " Created wheel for distance: filename=Distance-0.1.3-py3-none-any.whl size=16258 sha256=ade70730449fb839934e857bdcddc6de204e5eaab05db259da2f85be3fc099d0\n",
688
+ " Stored in directory: /root/.cache/pip/wheels/e8/bb/de/f71bf63559ea9a921059a5405806f7ff6ed612a9231c4a9309\n",
689
+ " Building wheel for docopt (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
690
+ " Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13706 sha256=1aaae6b0427604326f67708418c3010e0f969b8a82ae512f79307f3978f09f52\n",
691
+ " Stored in directory: /root/.cache/pip/wheels/fc/ab/d4/5da2067ac95b36618c629a5f93f809425700506f72c9732fac\n",
692
+ " Building wheel for intervaltree (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
693
+ " Created wheel for intervaltree: filename=intervaltree-3.1.0-py2.py3-none-any.whl size=26096 sha256=f306547725eb9ea7e52d4b78e8d49734164aa4dc43faee9a74ebd91087a42b68\n",
694
+ " Stored in directory: /root/.cache/pip/wheels/fa/80/8c/43488a924a046b733b64de3fac99252674c892a4c3801c0a61\n",
695
+ " Building wheel for asciitree (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
696
+ " Created wheel for asciitree: filename=asciitree-0.3.3-py3-none-any.whl size=5034 sha256=0251fcc8a18991f12d3209f3acf225199c31dee41236f40b77b69fde95038da9\n",
697
+ " Stored in directory: /root/.cache/pip/wheels/7f/4e/be/1171b40f43b918087657ec57cf3b81fa1a2e027d8755baa184\n",
698
+ " Building wheel for cdifflib (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
699
+ " Created wheel for cdifflib: filename=cdifflib-1.2.6-cp310-cp310-linux_x86_64.whl size=27681 sha256=7c7f9bf50579f19875573df405249c5c962045cdeb20eea6469b5c2d2defb0ce\n",
700
+ " Stored in directory: /root/.cache/pip/wheels/87/a7/fd/8061e24ed08689045cb6d1ca303768dc463b20a5a338174841\n",
701
+ "Successfully built causal-conv1d antlr4-python3-runtime progress clip fasttext kaldi-python-io nemo_toolkit rouge-score sox distance docopt intervaltree asciitree cdifflib\n",
702
+ "Installing collected packages: trampoline, pydub, progress, plac, pangu, opencc, ninja, ijson, docopt, distance, clip, braceexpand, asciitree, antlr4-python3-runtime, aniso8601, addict, xxhash, webdataset, trimesh, textdistance, sox, smmap, shellingham, setproctitle, sentry-sdk, ruamel.yaml.clib, rapidfuzz, pytest-runner, pypinyin, pynini, pybind11, portalocker, pathspec, parameterized, onnx, omegaconf, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, numcodecs, mypy-extensions, marshmallow, markdown2, loguru, lilcom, lightning-utilities, libcst, latexcodec, kornia-rs, kaldiio, kaldi-python-io, jmespath, jedi, isort, intervaltree, ftfy, fasteners, faiss-cpu, einops, docutils, docker-pycreds, dill, decord, cytoolz, colorama, click, cdifflib, attrdict, zarr, sacremoses, sacrebleu, ruamel.yaml, resampy, pytest-mock, pypinyin-dict, PyMCubes, pyloudnorm, pybtex, pyannote.core, nvidia-cusparse-cu12, nvidia-cudnn-cu12, multiprocess, Levenshtein, jiwer, hydra-core, gitdb, fiddle, fasttext, einops-exts, botocore, black, typer, texterrors, s3transfer, rouge-score, pybtex-docutils, nvidia-cusolver-cu12, lhotse, gitpython, flask-restful, diffusers, wandb, transformers, sphinxcontrib-bibtex, pyannote.database, g2p-en, datasets, boto3, torchsde, torchmetrics, torchdiffeq, sentence-transformers, pyannote.metrics, nerfacc, nemo_toolkit, nemo-text-processing, kornia, causal-conv1d, accelerated-scan, timm, pytorch-lightning, taming-transformers, open-clip-torch\n",
703
+ " Attempting uninstall: docutils\n",
704
+ " Found existing installation: docutils 0.18.1\n",
705
+ " Uninstalling docutils-0.18.1:\n",
706
+ " Successfully uninstalled docutils-0.18.1\n",
707
+ " Attempting uninstall: click\n",
708
+ " Found existing installation: click 8.1.7\n",
709
+ " Uninstalling click-8.1.7:\n",
710
+ " Successfully uninstalled click-8.1.7\n",
711
+ " Attempting uninstall: typer\n",
712
+ " Found existing installation: typer 0.9.4\n",
713
+ " Uninstalling typer-0.9.4:\n",
714
+ " Successfully uninstalled typer-0.9.4\n",
715
+ " Attempting uninstall: transformers\n",
716
+ " Found existing installation: transformers 4.41.0\n",
717
+ " Uninstalling transformers-4.41.0:\n",
718
+ " Successfully uninstalled transformers-4.41.0\n",
719
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
720
+ "spacy 3.7.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n",
721
+ "weasel 0.3.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\u001b[0m\u001b[31m\n",
722
+ "\u001b[0mSuccessfully installed Levenshtein-0.22.0 PyMCubes-0.1.4 accelerated-scan-0.2.0 addict-2.4.0 aniso8601-9.0.1 antlr4-python3-runtime-4.9.3 asciitree-0.3.3 attrdict-2.0.1 black-24.4.2 boto3-1.34.113 botocore-1.34.113 braceexpand-0.1.7 causal-conv1d-1.2.2.post1 cdifflib-1.2.6 click-8.0.2 clip-0.2.0 colorama-0.4.6 cytoolz-0.12.3 datasets-2.19.1 decord-0.6.0 diffusers-0.28.0 dill-0.3.8 distance-0.1.3 docker-pycreds-0.4.0 docopt-0.6.2 docutils-0.17.1 einops-0.8.0 einops-exts-0.0.4 faiss-cpu-1.8.0 fasteners-0.19 fasttext-0.9.2 fiddle-0.3.0 flask-restful-0.3.10 ftfy-6.2.0 g2p-en-2.1.0 gitdb-4.0.11 gitpython-3.1.43 hydra-core-1.3.2 ijson-3.2.3 intervaltree-3.1.0 isort-5.13.2 jedi-0.19.1 jiwer-2.5.2 jmespath-1.0.1 kaldi-python-io-1.2.2 kaldiio-2.18.0 kornia-0.7.2 kornia-rs-0.1.3 latexcodec-3.0.0 lhotse-1.23.0 libcst-1.4.0 lightning-utilities-0.11.2 lilcom-1.7 loguru-0.7.2 markdown2-2.4.13 marshmallow-3.21.2 multiprocess-0.70.16 mypy-extensions-1.0.0 nemo-text-processing-1.0.2 nemo_toolkit-2.0.0rc1 nerfacc-0.5.3 ninja-1.11.1.1 numcodecs-0.12.1 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.5.40 nvidia-nvtx-cu12-12.1.105 omegaconf-2.3.0 onnx-1.16.1 open-clip-torch-2.24.0 opencc-1.1.6 pangu-4.0.6.1 parameterized-0.9.0 pathspec-0.12.1 plac-1.4.3 portalocker-2.8.2 progress-1.6 pyannote.core-5.0.0 pyannote.database-5.1.0 pyannote.metrics-3.2.1 pybind11-2.12.0 pybtex-0.24.0 pybtex-docutils-1.0.3 pydub-0.25.1 pyloudnorm-0.1.1 pynini-2.1.5 pypinyin-0.51.0 pypinyin-dict-0.8.0 pytest-mock-3.14.0 pytest-runner-6.0.1 pytorch-lightning-2.2.5 rapidfuzz-2.13.7 resampy-0.4.3 rouge-score-0.1.2 ruamel.yaml-0.18.6 ruamel.yaml.clib-0.2.8 s3transfer-0.10.1 sacrebleu-2.4.2 sacremoses-0.1.1 sentence-transformers-2.7.0 sentry-sdk-2.3.1 setproctitle-1.3.3 shellingham-1.5.4 smmap-5.0.1 sox-1.5.0 sphinxcontrib-bibtex-2.6.2 taming-transformers-0.0.1 textdistance-4.6.2 texterrors-0.4.4 timm-1.0.3 torchdiffeq-0.2.3 torchmetrics-1.4.0.post0 torchsde-0.2.6 trampoline-0.1.2 transformers-4.40.2 trimesh-4.4.0 typer-0.12.3 wandb-0.17.0 webdataset-0.2.86 xxhash-3.4.1 zarr-2.18.2\n"
723
+ ]
724
+ }
725
+ ],
726
+ "source": [
727
+ "!pip install wget\n",
728
+ "!apt-get install sox libsndfile1 ffmpeg\n",
729
+ "!pip install matplotlib>=3.3.2\n",
730
+ "\n",
731
+ "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@1fa961ba03ab5f8c91b278640e29807079373372#egg=nemo_toolkit[all]"
732
+ ]
733
+ },
734
+ {
735
+ "cell_type": "code",
736
+ "execution_count": null,
737
+ "metadata": {
738
+ "id": "_Utv8kLRW9Js"
739
+ },
740
+ "outputs": [],
741
+ "source": [
742
+ "from typing import List, Union\n",
743
+ "\n",
744
+ "import hydra\n",
745
+ "import soundfile as sf\n",
746
+ "import torch\n",
747
+ "from omegaconf import DictConfig, ListConfig, OmegaConf"
748
+ ]
749
+ },
750
+ {
751
+ "cell_type": "markdown",
752
+ "metadata": {
753
+ "id": "ZLslfbEfXQIE"
754
+ },
755
+ "source": [
756
+ "### Model for emotions classification"
757
+ ]
758
+ },
759
+ {
760
+ "cell_type": "code",
761
+ "execution_count": null,
762
+ "metadata": {
763
+ "id": "paEKSFFVXPqC"
764
+ },
765
+ "outputs": [],
766
+ "source": [
767
+ "class SpecScaler(torch.nn.Module):\n",
768
+ " def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
769
+ " return torch.log(x.clamp_(1e-9, 1e9))\n",
770
+ "\n",
771
+ "\n",
772
+ "class GigaAMEmo(torch.nn.Module):\n",
773
+ " def __init__(self, conf: Union[DictConfig, ListConfig]):\n",
774
+ " super().__init__()\n",
775
+ " self.id2name = conf.id2name\n",
776
+ " self.feature_extractor = hydra.utils.instantiate(conf.feature_extractor)\n",
777
+ " self.conformer = hydra.utils.instantiate(conf.encoder)\n",
778
+ " self.linear_head = hydra.utils.instantiate(conf.classification_head)\n",
779
+ "\n",
780
+ " def forward(self, features, features_length=None):\n",
781
+ " if features.dim() == 2:\n",
782
+ " features = features.unsqueeze(0)\n",
783
+ " if not features_length:\n",
784
+ " features_length = torch.ones(features.shape[0]) * features.shape[-1]\n",
785
+ " features_length = features_length.to(features.device)\n",
786
+ " encoded, _ = self.conformer(audio_signal=features, length=features_length)\n",
787
+ " encoded_pooled = torch.nn.functional.avg_pool1d(\n",
788
+ " encoded, kernel_size=encoded.shape[-1]\n",
789
+ " ).squeeze(-1)\n",
790
+ "\n",
791
+ " logits = self.linear_head(encoded_pooled)\n",
792
+ " return logits\n",
793
+ "\n",
794
+ " def get_probs(self, audio_path: str) -> List[List[float]]:\n",
795
+ " audio_signal, _ = sf.read(audio_path, dtype=\"float32\")\n",
796
+ " features = self.feature_extractor(\n",
797
+ " torch.tensor(audio_signal).float().to(next(self.parameters()).device)\n",
798
+ " )\n",
799
+ " logits = self.forward(features)\n",
800
+ " probs = torch.nn.functional.softmax(logits).detach().tolist()\n",
801
+ " return probs"
802
+ ]
803
+ },
804
+ {
805
+ "cell_type": "markdown",
806
+ "metadata": {
807
+ "id": "7UFpN0Ghc244"
808
+ },
809
+ "source": [
810
+ "### Downloading config, weights and audio example"
811
+ ]
812
+ },
813
+ {
814
+ "cell_type": "code",
815
+ "execution_count": null,
816
+ "metadata": {
817
+ "colab": {
818
+ "base_uri": "https://localhost:8080/"
819
+ },
820
+ "id": "jFZJGISjcmHW",
821
+ "outputId": "74a2a71e-2dba-4551-c2cb-737eaa35bfa4"
822
+ },
823
+ "outputs": [
824
+ {
825
+ "name": "stdout",
826
+ "output_type": "stream",
827
+ "text": [
828
+ "--2024-05-28 07:10:07-- https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_weights.ckpt\n",
829
+ "Resolving n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)... 37.230.193.192\n",
830
+ "Connecting to n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)|37.230.193.192|:443... connected.\n",
831
+ "HTTP request sent, awaiting response... 200 OK\n",
832
+ "Length: 968409626 (924M) [application/octet-stream]\n",
833
+ "Saving to: ‘emo_model_weights.ckpt’\n",
834
+ "\n",
835
+ "emo_model_weights.c 100%[===================>] 923.55M 7.48MB/s in 1m 45s \n",
836
+ "\n",
837
+ "2024-05-28 07:11:53 (8.82 MB/s) - ‘emo_model_weights.ckpt’ saved [968409626/968409626]\n",
838
+ "\n",
839
+ "--2024-05-28 07:11:54-- https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_config.yaml\n",
840
+ "Resolving n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)... 37.230.193.192\n",
841
+ "Connecting to n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)|37.230.193.192|:443... connected.\n",
842
+ "HTTP request sent, awaiting response... 200 OK\n",
843
+ "Length: 765 [application/octet-stream]\n",
844
+ "Saving to: ‘emo_model_config.yaml’\n",
845
+ "\n",
846
+ "emo_model_config.ya 100%[===================>] 765 --.-KB/s in 0s \n",
847
+ "\n",
848
+ "2024-05-28 07:11:54 (252 MB/s) - ‘emo_model_config.yaml’ saved [765/765]\n",
849
+ "\n",
850
+ "--2024-05-28 07:11:54-- https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/example.wav\n",
851
+ "Resolving n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)... 37.230.193.192\n",
852
+ "Connecting to n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)|37.230.193.192|:443... connected.\n",
853
+ "HTTP request sent, awaiting response... 200 OK\n",
854
+ "Length: 361324 (353K) [application/octet-stream]\n",
855
+ "Saving to: ‘example.wav’\n",
856
+ "\n",
857
+ "example.wav 100%[===================>] 352.86K 715KB/s in 0.5s \n",
858
+ "\n",
859
+ "2024-05-28 07:11:56 (715 KB/s) - ‘example.wav’ saved [361324/361324]\n",
860
+ "\n"
861
+ ]
862
+ }
863
+ ],
864
+ "source": [
865
+ "import locale\n",
866
+ "\n",
867
+ "locale.getpreferredencoding = lambda: \"UTF-8\"\n",
868
+ "\n",
869
+ "# Loading weights, config and example wav for CTC-model\n",
870
+ "!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_weights.ckpt\n",
871
+ "!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_config.yaml\n",
872
+ "!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/example.wav"
873
+ ]
874
+ },
875
+ {
876
+ "cell_type": "markdown",
877
+ "metadata": {
878
+ "id": "NZ6-O2M0fxDY"
879
+ },
880
+ "source": [
881
+ "### Model instantiating and inference"
882
+ ]
883
+ },
884
+ {
885
+ "cell_type": "code",
886
+ "execution_count": null,
887
+ "metadata": {
888
+ "colab": {
889
+ "base_uri": "https://localhost:8080/"
890
+ },
891
+ "id": "plXt8297d5km",
892
+ "outputId": "537acb90-f6a1-4a73-ea66-0d3fe6fd9a3a"
893
+ },
894
+ "outputs": [
895
+ {
896
+ "name": "stderr",
897
+ "output_type": "stream",
898
+ "text": [
899
+ "[NeMo W 2024-05-28 07:20:12 nemo_logging:349] <ipython-input-3-06a5deda234a>:32: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
900
+ " probs = torch.nn.functional.softmax(logits).detach().tolist()\n",
901
+ " \n"
902
+ ]
903
+ },
904
+ {
905
+ "name": "stdout",
906
+ "output_type": "stream",
907
+ "text": [
908
+ "angry: 0.000, sad: 0.002, neutral: 0.923, positive: 0.074\n"
909
+ ]
910
+ }
911
+ ],
912
+ "source": [
913
+ "model_config = \"emo_model_config.yaml\"\n",
914
+ "model_weights = \"emo_model_weights.ckpt\"\n",
915
+ "audio_path = \"example.wav\"\n",
916
+ "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
917
+ "\n",
918
+ "conf = OmegaConf.load(model_config)\n",
919
+ "model = GigaAMEmo(conf)\n",
920
+ "ckpt = torch.load(model_weights, map_location=\"cpu\")\n",
921
+ "model.load_state_dict(ckpt, strict=False)\n",
922
+ "model = model.to(device)\n",
923
+ "model.eval()\n",
924
+ "with torch.no_grad():\n",
925
+ " probs = model.get_probs(audio_path)[0]\n",
926
+ "print(\", \".join([f\"{model.id2name[i]}: {p:.3f}\" for i, p in enumerate(probs)]))"
927
+ ]
928
+ },
929
+ {
930
+ "cell_type": "code",
931
+ "execution_count": null,
932
+ "metadata": {
933
+ "id": "nmFvC_GfkasV"
934
+ },
935
+ "outputs": [],
936
+ "source": []
937
+ }
938
+ ],
939
+ "metadata": {
940
+ "accelerator": "GPU",
941
+ "colab": {
942
+ "gpuType": "T4",
943
+ "provenance": []
944
+ },
945
+ "kernelspec": {
946
+ "display_name": "Python 3",
947
+ "name": "python3"
948
+ },
949
+ "language_info": {
950
+ "name": "python"
951
+ }
952
+ },
953
+ "nbformat": 4,
954
+ "nbformat_minor": 0
955
+ }
Examples/notebooks/GigaAM_Model_Usage_Example.ipynb ADDED
@@ -0,0 +1,881 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4"
8
+ },
9
+ "kernelspec": {
10
+ "name": "python3",
11
+ "display_name": "Python 3"
12
+ },
13
+ "language_info": {
14
+ "name": "python"
15
+ },
16
+ "accelerator": "GPU"
17
+ },
18
+ "cells": [
19
+ {
20
+ "cell_type": "markdown",
21
+ "source": [
22
+ "### Installing and importing dependencies"
23
+ ],
24
+ "metadata": {
25
+ "id": "aqymJFVQhere"
26
+ }
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": null,
31
+ "metadata": {
32
+ "colab": {
33
+ "base_uri": "https://localhost:8080/"
34
+ },
35
+ "id": "mJ5zzajTbzRX",
36
+ "outputId": "a6e8f1cc-5ef7-43e2-824e-39133c8f3f98"
37
+ },
38
+ "outputs": [
39
+ {
40
+ "output_type": "stream",
41
+ "name": "stdout",
42
+ "text": [
43
+ "Collecting wget\n",
44
+ " Downloading wget-3.2.zip (10 kB)\n",
45
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
46
+ "Building wheels for collected packages: wget\n",
47
+ " Building wheel for wget (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
48
+ " Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9656 sha256=fb233af0965c5da90b8babdcb0fbd51095c2a135ec877618013ed9078dced85b\n",
49
+ " Stored in directory: /root/.cache/pip/wheels/8b/f1/7f/5c94f0a7a505ca1c81cd1d9208ae2064675d97582078e6c769\n",
50
+ "Successfully built wget\n",
51
+ "Installing collected packages: wget\n",
52
+ "Successfully installed wget-3.2\n",
53
+ "Reading package lists... Done\n",
54
+ "Building dependency tree... Done\n",
55
+ "Reading state information... Done\n",
56
+ "libsndfile1 is already the newest version (1.0.31-2ubuntu0.1).\n",
57
+ "ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).\n",
58
+ "The following additional packages will be installed:\n",
59
+ " libopencore-amrnb0 libopencore-amrwb0 libsox-fmt-alsa libsox-fmt-base libsox3 libwavpack1\n",
60
+ "Suggested packages:\n",
61
+ " libsox-fmt-all\n",
62
+ "The following NEW packages will be installed:\n",
63
+ " libopencore-amrnb0 libopencore-amrwb0 libsox-fmt-alsa libsox-fmt-base libsox3 libwavpack1 sox\n",
64
+ "0 upgraded, 7 newly installed, 0 to remove and 45 not upgraded.\n",
65
+ "Need to get 617 kB of archives.\n",
66
+ "After this operation, 1,764 kB of additional disk space will be used.\n",
67
+ "Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libopencore-amrnb0 amd64 0.1.5-1 [94.8 kB]\n",
68
+ "Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libopencore-amrwb0 amd64 0.1.5-1 [49.1 kB]\n",
69
+ "Get:3 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox3 amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [240 kB]\n",
70
+ "Get:4 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox-fmt-alsa amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [11.2 kB]\n",
71
+ "Get:5 http://archive.ubuntu.com/ubuntu jammy/main amd64 libwavpack1 amd64 5.4.0-1build2 [83.7 kB]\n",
72
+ "Get:6 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox-fmt-base amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [33.7 kB]\n",
73
+ "Get:7 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 sox amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [104 kB]\n",
74
+ "Fetched 617 kB in 0s (2,171 kB/s)\n",
75
+ "Selecting previously unselected package libopencore-amrnb0:amd64.\n",
76
+ "(Reading database ... 121918 files and directories currently installed.)\n",
77
+ "Preparing to unpack .../0-libopencore-amrnb0_0.1.5-1_amd64.deb ...\n",
78
+ "Unpacking libopencore-amrnb0:amd64 (0.1.5-1) ...\n",
79
+ "Selecting previously unselected package libopencore-amrwb0:amd64.\n",
80
+ "Preparing to unpack .../1-libopencore-amrwb0_0.1.5-1_amd64.deb ...\n",
81
+ "Unpacking libopencore-amrwb0:amd64 (0.1.5-1) ...\n",
82
+ "Selecting previously unselected package libsox3:amd64.\n",
83
+ "Preparing to unpack .../2-libsox3_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
84
+ "Unpacking libsox3:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
85
+ "Selecting previously unselected package libsox-fmt-alsa:amd64.\n",
86
+ "Preparing to unpack .../3-libsox-fmt-alsa_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
87
+ "Unpacking libsox-fmt-alsa:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
88
+ "Selecting previously unselected package libwavpack1:amd64.\n",
89
+ "Preparing to unpack .../4-libwavpack1_5.4.0-1build2_amd64.deb ...\n",
90
+ "Unpacking libwavpack1:amd64 (5.4.0-1build2) ...\n",
91
+ "Selecting previously unselected package libsox-fmt-base:amd64.\n",
92
+ "Preparing to unpack .../5-libsox-fmt-base_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
93
+ "Unpacking libsox-fmt-base:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
94
+ "Selecting previously unselected package sox.\n",
95
+ "Preparing to unpack .../6-sox_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
96
+ "Unpacking sox (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
97
+ "Setting up libsox3:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
98
+ "Setting up libopencore-amrwb0:amd64 (0.1.5-1) ...\n",
99
+ "Setting up libsox-fmt-alsa:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
100
+ "Setting up libwavpack1:amd64 (5.4.0-1build2) ...\n",
101
+ "Setting up libopencore-amrnb0:amd64 (0.1.5-1) ...\n",
102
+ "Setting up libsox-fmt-base:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
103
+ "Setting up sox (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
104
+ "Processing triggers for man-db (2.10.2-1) ...\n",
105
+ "Processing triggers for libc-bin (2.35-0ubuntu3.4) ...\n",
106
+ "/sbin/ldconfig.real: /usr/local/lib/libtbb.so.12 is not a symbolic link\n",
107
+ "\n",
108
+ "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc_proxy.so.2 is not a symbolic link\n",
109
+ "\n",
110
+ "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_5.so.3 is not a symbolic link\n",
111
+ "\n",
112
+ "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_0.so.3 is not a symbolic link\n",
113
+ "\n",
114
+ "/sbin/ldconfig.real: /usr/local/lib/libtbbbind.so.3 is not a symbolic link\n",
115
+ "\n",
116
+ "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc.so.2 is not a symbolic link\n",
117
+ "\n",
118
+ "\u001b[33mDEPRECATION: git+https://github.com/NVIDIA/NeMo.git#egg=nemo_toolkit[all] contains an egg fragment with a non-PEP 508 name pip 25.0 will enforce this behaviour change. A possible replacement is to use the req @ url syntax, and remove the egg fragment. Discussion can be found at https://github.com/pypa/pip/issues/11617\u001b[0m\u001b[33m\n",
119
+ "\u001b[0mCollecting nemo_toolkit[all]\n",
120
+ " Cloning https://github.com/NVIDIA/NeMo.git to /tmp/pip-install-rgi4yev1/nemo-toolkit_1a843c4761a042a6a6b3b0dfbb81352c\n",
121
+ " Running command git clone --filter=blob:none --quiet https://github.com/NVIDIA/NeMo.git /tmp/pip-install-rgi4yev1/nemo-toolkit_1a843c4761a042a6a6b3b0dfbb81352c\n",
122
+ " Resolved https://github.com/NVIDIA/NeMo.git to commit 1fa961ba03ab5f8c91b278640e29807079373372\n",
123
+ " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
124
+ " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
125
+ " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
126
+ "Collecting fiddle (from nemo_toolkit[all])\n",
127
+ " Downloading fiddle-0.3.0-py3-none-any.whl (419 kB)\n",
128
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m419.8/419.8 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
129
+ "\u001b[?25hRequirement already satisfied: huggingface-hub>=0.20.3 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.23.1)\n",
130
+ "Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.58.1)\n",
131
+ "Requirement already satisfied: numpy>=1.22 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.25.2)\n",
132
+ "Collecting onnx>=1.7.0 (from nemo_toolkit[all])\n",
133
+ " Downloading onnx-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)\n",
134
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m64.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
135
+ "\u001b[?25hRequirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.8.2)\n",
136
+ "Collecting ruamel.yaml (from nemo_toolkit[all])\n",
137
+ " Downloading ruamel.yaml-0.18.6-py3-none-any.whl (117 kB)\n",
138
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.8/117.8 kB\u001b[0m \u001b[31m11.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
139
+ "\u001b[?25hRequirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.2.2)\n",
140
+ "Requirement already satisfied: setuptools>=65.5.1 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (67.7.2)\n",
141
+ "Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.15.2)\n",
142
+ "Requirement already satisfied: text-unidecode in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.3)\n",
143
+ "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.3.0+cu121)\n",
144
+ "Requirement already satisfied: tqdm>=4.41.0 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (4.66.4)\n",
145
+ "Requirement already satisfied: wget in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.2)\n",
146
+ "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.14.1)\n",
147
+ "Collecting black~=24.3 (from nemo_toolkit[all])\n",
148
+ " Downloading black-24.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.8 MB)\n",
149
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m56.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
150
+ "\u001b[?25hCollecting click==8.0.2 (from nemo_toolkit[all])\n",
151
+ " Downloading click-8.0.2-py3-none-any.whl (97 kB)\n",
152
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m97.6/97.6 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
153
+ "\u001b[?25hCollecting isort<6.0.0,>5.1.0 (from nemo_toolkit[all])\n",
154
+ " Downloading isort-5.13.2-py3-none-any.whl (92 kB)\n",
155
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.3/92.3 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
156
+ "\u001b[?25hCollecting parameterized (from nemo_toolkit[all])\n",
157
+ " Downloading parameterized-0.9.0-py2.py3-none-any.whl (20 kB)\n",
158
+ "Requirement already satisfied: pytest in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (7.4.4)\n",
159
+ "Collecting pytest-mock (from nemo_toolkit[all])\n",
160
+ " Downloading pytest_mock-3.14.0-py3-none-any.whl (9.9 kB)\n",
161
+ "Collecting pytest-runner (from nemo_toolkit[all])\n",
162
+ " Downloading pytest_runner-6.0.1-py3-none-any.whl (7.2 kB)\n",
163
+ "Requirement already satisfied: sphinx in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (5.0.2)\n",
164
+ "Collecting sphinxcontrib-bibtex (from nemo_toolkit[all])\n",
165
+ " Downloading sphinxcontrib_bibtex-2.6.2-py3-none-any.whl (40 kB)\n",
166
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.0/41.0 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
167
+ "\u001b[?25hCollecting wandb (from nemo_toolkit[all])\n",
168
+ " Downloading wandb-0.17.0-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.7 MB)\n",
169
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.7/6.7 MB\u001b[0m \u001b[31m57.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
170
+ "\u001b[?25hRequirement already satisfied: cloudpickle in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.2.1)\n",
171
+ "Collecting hydra-core<=1.3.2,>1.3 (from nemo_toolkit[all])\n",
172
+ " Downloading hydra_core-1.3.2-py3-none-any.whl (154 kB)\n",
173
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m154.5/154.5 kB\u001b[0m \u001b[31m14.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
174
+ "\u001b[?25hCollecting omegaconf<=2.3 (from nemo_toolkit[all])\n",
175
+ " Downloading omegaconf-2.3.0-py3-none-any.whl (79 kB)\n",
176
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
177
+ "\u001b[?25hCollecting pytorch-lightning>=2.2.1 (from nemo_toolkit[all])\n",
178
+ " Downloading pytorch_lightning-2.2.5-py3-none-any.whl (802 kB)\n",
179
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m802.3/802.3 kB\u001b[0m \u001b[31m47.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
180
+ "\u001b[?25hCollecting torchmetrics>=0.11.0 (from nemo_toolkit[all])\n",
181
+ " Downloading torchmetrics-1.4.0.post0-py3-none-any.whl (868 kB)\n",
182
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m868.8/868.8 kB\u001b[0m \u001b[31m46.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
183
+ "\u001b[?25hCollecting transformers<=4.40.2,>=4.36.0 (from nemo_toolkit[all])\n",
184
+ " Downloading transformers-4.40.2-py3-none-any.whl (9.0 MB)\n",
185
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.0/9.0 MB\u001b[0m \u001b[31m70.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
186
+ "\u001b[?25hCollecting webdataset>=0.2.86 (from nemo_toolkit[all])\n",
187
+ " Downloading webdataset-0.2.86-py3-none-any.whl (70 kB)\n",
188
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m70.4/70.4 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
189
+ "\u001b[?25hCollecting datasets (from nemo_toolkit[all])\n",
190
+ " Downloading datasets-2.19.1-py3-none-any.whl (542 kB)\n",
191
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m542.0/542.0 kB\u001b[0m \u001b[31m40.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
192
+ "\u001b[?25hRequirement already satisfied: inflect in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (7.0.0)\n",
193
+ "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.0.3)\n",
194
+ "Collecting sacremoses>=0.0.43 (from nemo_toolkit[all])\n",
195
+ " Downloading sacremoses-0.1.1-py3-none-any.whl (897 kB)\n",
196
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m897.5/897.5 kB\u001b[0m \u001b[31m56.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
197
+ "\u001b[?25hRequirement already satisfied: sentencepiece<1.0.0 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.1.99)\n",
198
+ "Collecting braceexpand (from nemo_toolkit[all])\n",
199
+ " Downloading braceexpand-0.1.7-py2.py3-none-any.whl (5.9 kB)\n",
200
+ "Requirement already satisfied: editdistance in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.6.2)\n",
201
+ "Collecting einops (from nemo_toolkit[all])\n",
202
+ " Downloading einops-0.8.0-py3-none-any.whl (43 kB)\n",
203
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.2/43.2 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
204
+ "\u001b[?25hCollecting g2p-en (from nemo_toolkit[all])\n",
205
+ " Downloading g2p_en-2.1.0-py3-none-any.whl (3.1 MB)\n",
206
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m76.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
207
+ "\u001b[?25hRequirement already satisfied: ipywidgets in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (7.7.1)\n",
208
+ "Collecting jiwer (from nemo_toolkit[all])\n",
209
+ " Downloading jiwer-3.0.4-py3-none-any.whl (21 kB)\n",
210
+ "Collecting kaldi-python-io (from nemo_toolkit[all])\n",
211
+ " Downloading kaldi-python-io-1.2.2.tar.gz (8.8 kB)\n",
212
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
213
+ "Collecting kaldiio (from nemo_toolkit[all])\n",
214
+ " Downloading kaldiio-2.18.0-py3-none-any.whl (28 kB)\n",
215
+ "Collecting lhotse>=1.22.0 (from nemo_toolkit[all])\n",
216
+ " Downloading lhotse-1.23.0-py3-none-any.whl (772 kB)\n",
217
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m772.4/772.4 kB\u001b[0m \u001b[31m50.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
218
+ "\u001b[?25hRequirement already satisfied: librosa>=0.10.0 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.10.2.post1)\n",
219
+ "Collecting marshmallow (from nemo_toolkit[all])\n",
220
+ " Downloading marshmallow-3.21.2-py3-none-any.whl (49 kB)\n",
221
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
222
+ "\u001b[?25hRequirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.7.1)\n",
223
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (24.0)\n",
224
+ "Collecting pyannote.core (from nemo_toolkit[all])\n",
225
+ " Downloading pyannote.core-5.0.0-py3-none-any.whl (58 kB)\n",
226
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.5/58.5 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
227
+ "\u001b[?25hCollecting pyannote.metrics (from nemo_toolkit[all])\n",
228
+ " Downloading pyannote.metrics-3.2.1-py3-none-any.whl (51 kB)\n",
229
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.4/51.4 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
230
+ "\u001b[?25hCollecting pydub (from nemo_toolkit[all])\n",
231
+ " Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
232
+ "Collecting pyloudnorm (from nemo_toolkit[all])\n",
233
+ " Downloading pyloudnorm-0.1.1-py3-none-any.whl (9.6 kB)\n",
234
+ "Collecting resampy (from nemo_toolkit[all])\n",
235
+ " Downloading resampy-0.4.3-py3-none-any.whl (3.1 MB)\n",
236
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m82.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
237
+ "\u001b[?25hRequirement already satisfied: scipy>=0.14 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.11.4)\n",
238
+ "Requirement already satisfied: soundfile in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.12.1)\n",
239
+ "Collecting sox (from nemo_toolkit[all])\n",
240
+ " Downloading sox-1.5.0.tar.gz (63 kB)\n",
241
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.9/63.9 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
242
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
243
+ "Collecting texterrors (from nemo_toolkit[all])\n",
244
+ " Downloading texterrors-0.4.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n",
245
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m58.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
246
+ "\u001b[?25hCollecting accelerated-scan (from nemo_toolkit[all])\n",
247
+ " Downloading accelerated_scan-0.2.0-py3-none-any.whl (11 kB)\n",
248
+ "Collecting boto3 (from nemo_toolkit[all])\n",
249
+ " Downloading boto3-1.34.113-py3-none-any.whl (139 kB)\n",
250
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.3/139.3 kB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
251
+ "\u001b[?25hCollecting causal-conv1d>=1.2.0 (from nemo_toolkit[all])\n",
252
+ " Downloading causal_conv1d-1.2.2.post1.tar.gz (7.2 kB)\n",
253
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
254
+ "Collecting faiss-cpu (from nemo_toolkit[all])\n",
255
+ " Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)\n",
256
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.0/27.0 MB\u001b[0m \u001b[31m41.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
257
+ "\u001b[?25hCollecting fasttext (from nemo_toolkit[all])\n",
258
+ " Downloading fasttext-0.9.2.tar.gz (68 kB)\n",
259
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m68.8/68.8 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
260
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
261
+ "Collecting flask-restful (from nemo_toolkit[all])\n",
262
+ " Downloading Flask_RESTful-0.3.10-py2.py3-none-any.whl (26 kB)\n",
263
+ "Collecting ftfy (from nemo_toolkit[all])\n",
264
+ " Downloading ftfy-6.2.0-py3-none-any.whl (54 kB)\n",
265
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.4/54.4 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
266
+ "\u001b[?25hRequirement already satisfied: gdown in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (5.1.0)\n",
267
+ "Requirement already satisfied: h5py in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.9.0)\n",
268
+ "Collecting ijson (from nemo_toolkit[all])\n",
269
+ " Downloading ijson-3.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (111 kB)\n",
270
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.8/111.8 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
271
+ "\u001b[?25hRequirement already satisfied: jieba in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.42.1)\n",
272
+ "Collecting markdown2 (from nemo_toolkit[all])\n",
273
+ " Downloading markdown2-2.4.13-py2.py3-none-any.whl (41 kB)\n",
274
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.3/41.3 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
275
+ "\u001b[?25hRequirement already satisfied: nltk>=3.6.5 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.8.1)\n",
276
+ "Collecting opencc<1.1.7 (from nemo_toolkit[all])\n",
277
+ " Downloading OpenCC-1.1.6-cp310-cp310-manylinux1_x86_64.whl (778 kB)\n",
278
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m778.3/778.3 kB\u001b[0m \u001b[31m43.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
279
+ "\u001b[?25hCollecting pangu (from nemo_toolkit[all])\n",
280
+ " Downloading pangu-4.0.6.1-py3-none-any.whl (6.4 kB)\n",
281
+ "Collecting rapidfuzz (from nemo_toolkit[all])\n",
282
+ " Downloading rapidfuzz-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n",
283
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m76.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
284
+ "\u001b[?25hCollecting rouge-score (from nemo_toolkit[all])\n",
285
+ " Downloading rouge_score-0.1.2.tar.gz (17 kB)\n",
286
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
287
+ "Collecting sacrebleu (from nemo_toolkit[all])\n",
288
+ " Downloading sacrebleu-2.4.2-py3-none-any.whl (106 kB)\n",
289
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.7/106.7 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
290
+ "\u001b[?25hCollecting sentence-transformers (from nemo_toolkit[all])\n",
291
+ " Downloading sentence_transformers-2.7.0-py3-none-any.whl (171 kB)\n",
292
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m171.5/171.5 kB\u001b[0m \u001b[31m19.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
293
+ "\u001b[?25hRequirement already satisfied: tensorstore<0.1.46 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.1.45)\n",
294
+ "Collecting zarr (from nemo_toolkit[all])\n",
295
+ " Downloading zarr-2.18.2-py3-none-any.whl (210 kB)\n",
296
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m210.2/210.2 kB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
297
+ "\u001b[?25hCollecting attrdict (from nemo_toolkit[all])\n",
298
+ " Downloading attrdict-2.0.1-py2.py3-none-any.whl (9.9 kB)\n",
299
+ "Collecting kornia (from nemo_toolkit[all])\n",
300
+ " Downloading kornia-0.7.2-py2.py3-none-any.whl (825 kB)\n",
301
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m825.4/825.4 kB\u001b[0m \u001b[31m54.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
302
+ "\u001b[?25hCollecting pypinyin (from nemo_toolkit[all])\n",
303
+ " Downloading pypinyin-0.51.0-py2.py3-none-any.whl (1.4 MB)\n",
304
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m62.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
305
+ "\u001b[?25hCollecting pypinyin-dict (from nemo_toolkit[all])\n",
306
+ " Downloading pypinyin_dict-0.8.0-py2.py3-none-any.whl (9.5 MB)\n",
307
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.5/9.5 MB\u001b[0m \u001b[31m92.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
308
+ "\u001b[?25hCollecting progress>=1.5 (from nemo_toolkit[all])\n",
309
+ " Downloading progress-1.6.tar.gz (7.8 kB)\n",
310
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
311
+ "Requirement already satisfied: tabulate>=0.8.7 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.9.0)\n",
312
+ "Collecting textdistance>=4.1.5 (from nemo_toolkit[all])\n",
313
+ " Downloading textdistance-4.6.2-py3-none-any.whl (31 kB)\n",
314
+ "Collecting addict (from nemo_toolkit[all])\n",
315
+ " Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)\n",
316
+ "Collecting clip (from nemo_toolkit[all])\n",
317
+ " Downloading clip-0.2.0.tar.gz (5.5 kB)\n",
318
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
319
+ "Collecting decord (from nemo_toolkit[all])\n",
320
+ " Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl (13.6 MB)\n",
321
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.6/13.6 MB\u001b[0m \u001b[31m74.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
322
+ "\u001b[?25hCollecting diffusers>=0.19.3 (from nemo_toolkit[all])\n",
323
+ " Downloading diffusers-0.28.0-py3-none-any.whl (2.2 MB)\n",
324
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m59.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
325
+ "\u001b[?25hCollecting einops-exts (from nemo_toolkit[all])\n",
326
+ " Downloading einops_exts-0.0.4-py3-none-any.whl (3.9 kB)\n",
327
+ "Requirement already satisfied: imageio in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.31.6)\n",
328
+ "Collecting nerfacc>=0.5.3 (from nemo_toolkit[all])\n",
329
+ " Downloading nerfacc-0.5.3-py3-none-any.whl (54 kB)\n",
330
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.6/54.6 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
331
+ "\u001b[?25hCollecting open-clip-torch (from nemo_toolkit[all])\n",
332
+ " Downloading open_clip_torch-2.24.0-py3-none-any.whl (1.5 MB)\n",
333
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m58.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
334
+ "\u001b[?25hCollecting PyMCubes (from nemo_toolkit[all])\n",
335
+ " Downloading PyMCubes-0.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (274 kB)\n",
336
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m274.3/274.3 kB\u001b[0m \u001b[31m19.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
337
+ "\u001b[?25hCollecting taming-transformers (from nemo_toolkit[all])\n",
338
+ " Downloading taming_transformers-0.0.1-py3-none-any.whl (45 kB)\n",
339
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.6/45.6 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
340
+ "\u001b[?25hCollecting torchdiffeq (from nemo_toolkit[all])\n",
341
+ " Downloading torchdiffeq-0.2.3-py3-none-any.whl (31 kB)\n",
342
+ "Collecting torchsde (from nemo_toolkit[all])\n",
343
+ " Downloading torchsde-0.2.6-py3-none-any.whl (61 kB)\n",
344
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.2/61.2 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
345
+ "\u001b[?25hCollecting trimesh (from nemo_toolkit[all])\n",
346
+ " Downloading trimesh-4.4.0-py3-none-any.whl (694 kB)\n",
347
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m694.6/694.6 kB\u001b[0m \u001b[31m39.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
348
+ "\u001b[?25hCollecting nemo-text-processing (from nemo_toolkit[all])\n",
349
+ " Downloading nemo_text_processing-1.0.2-py3-none-any.whl (2.6 MB)\n",
350
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m44.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
351
+ "\u001b[?25hCollecting mypy-extensions>=0.4.3 (from black~=24.3->nemo_toolkit[all])\n",
352
+ " Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n",
353
+ "Collecting pathspec>=0.9.0 (from black~=24.3->nemo_toolkit[all])\n",
354
+ " Downloading pathspec-0.12.1-py3-none-any.whl (31 kB)\n",
355
+ "Requirement already satisfied: platformdirs>=2 in /usr/local/lib/python3.10/dist-packages (from black~=24.3->nemo_toolkit[all]) (4.2.2)\n",
356
+ "Requirement already satisfied: tomli>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from black~=24.3->nemo_toolkit[all]) (2.0.1)\n",
357
+ "Requirement already satisfied: typing-extensions>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from black~=24.3->nemo_toolkit[all]) (4.11.0)\n",
358
+ "Collecting ninja (from causal-conv1d>=1.2.0->nemo_toolkit[all])\n",
359
+ " Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n",
360
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.2/307.2 kB\u001b[0m \u001b[31m25.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
361
+ "\u001b[?25hRequirement already satisfied: importlib-metadata in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (7.1.0)\n",
362
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (3.14.0)\n",
363
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (2023.12.25)\n",
364
+ "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (2.31.0)\n",
365
+ "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (0.4.3)\n",
366
+ "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (9.4.0)\n",
367
+ "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.20.3->nemo_toolkit[all]) (2023.6.0)\n",
368
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.20.3->nemo_toolkit[all]) (6.0.1)\n",
369
+ "Collecting antlr4-python3-runtime==4.9.* (from hydra-core<=1.3.2,>1.3->nemo_toolkit[all])\n",
370
+ " Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n",
371
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
372
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
373
+ "INFO: pip is looking at multiple versions of jiwer to determine which version is compatible with other requirements. This could take a while.\n",
374
+ "Collecting jiwer (from nemo_toolkit[all])\n",
375
+ " Downloading jiwer-3.0.3-py3-none-any.whl (21 kB)\n",
376
+ " Downloading jiwer-3.0.2-py3-none-any.whl (21 kB)\n",
377
+ " Downloading jiwer-3.0.1-py3-none-any.whl (21 kB)\n",
378
+ " Downloading jiwer-3.0.0-py3-none-any.whl (21 kB)\n",
379
+ " Downloading jiwer-2.6.0-py3-none-any.whl (20 kB)\n",
380
+ " Downloading jiwer-2.5.2-py3-none-any.whl (15 kB)\n",
381
+ "Collecting rapidfuzz (from nemo_toolkit[all])\n",
382
+ " Downloading rapidfuzz-2.13.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n",
383
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m63.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
384
+ "\u001b[?25hRequirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from lhotse>=1.22.0->nemo_toolkit[all]) (3.0.1)\n",
385
+ "Collecting cytoolz>=0.10.1 (from lhotse>=1.22.0->nemo_toolkit[all])\n",
386
+ " Downloading cytoolz-0.12.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n",
387
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m63.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
388
+ "\u001b[?25hCollecting intervaltree>=3.1.0 (from lhotse>=1.22.0->nemo_toolkit[all])\n",
389
+ " Downloading intervaltree-3.1.0.tar.gz (32 kB)\n",
390
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
391
+ "Collecting lilcom>=1.1.0 (from lhotse>=1.22.0->nemo_toolkit[all])\n",
392
+ " Downloading lilcom-1.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (87 kB)\n",
393
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.1/87.1 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
394
+ "\u001b[?25hRequirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (1.4.2)\n",
395
+ "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (4.4.2)\n",
396
+ "Requirement already satisfied: pooch>=1.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (1.8.1)\n",
397
+ "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (0.3.7)\n",
398
+ "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (0.4)\n",
399
+ "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (1.0.8)\n",
400
+ "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (1.2.1)\n",
401
+ "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (0.12.1)\n",
402
+ "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (4.51.0)\n",
403
+ "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (1.4.5)\n",
404
+ "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (3.1.2)\n",
405
+ "Requirement already satisfied: rich>=12 in /usr/local/lib/python3.10/dist-packages (from nerfacc>=0.5.3->nemo_toolkit[all]) (13.7.1)\n",
406
+ "Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->nemo_toolkit[all]) (0.41.1)\n",
407
+ "Requirement already satisfied: protobuf>=3.20.2 in /usr/local/lib/python3.10/dist-packages (from onnx>=1.7.0->nemo_toolkit[all]) (3.20.3)\n",
408
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil->nemo_toolkit[all]) (1.16.0)\n",
409
+ "Collecting lightning-utilities>=0.8.0 (from pytorch-lightning>=2.2.1->nemo_toolkit[all])\n",
410
+ " Downloading lightning_utilities-0.11.2-py3-none-any.whl (26 kB)\n",
411
+ "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->nemo_toolkit[all]) (3.5.0)\n",
412
+ "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile->nemo_toolkit[all]) (1.16.0)\n",
413
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (1.12)\n",
414
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (3.3)\n",
415
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (3.1.4)\n",
416
+ "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
417
+ " Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n",
418
+ "Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
419
+ " Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n",
420
+ "Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
421
+ " Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n",
422
+ "Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->nemo_toolkit[all])\n",
423
+ " Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n",
424
+ "Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->nemo_toolkit[all])\n",
425
+ " Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n",
426
+ "Collecting nvidia-cufft-cu12==11.0.2.54 (from torch->nemo_toolkit[all])\n",
427
+ " Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n",
428
+ "Collecting nvidia-curand-cu12==10.3.2.106 (from torch->nemo_toolkit[all])\n",
429
+ " Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n",
430
+ "Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch->nemo_toolkit[all])\n",
431
+ " Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n",
432
+ "Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch->nemo_toolkit[all])\n",
433
+ " Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n",
434
+ "Collecting nvidia-nccl-cu12==2.20.5 (from torch->nemo_toolkit[all])\n",
435
+ " Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n",
436
+ "Collecting nvidia-nvtx-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
437
+ " Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n",
438
+ "Requirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (2.3.0)\n",
439
+ "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch->nemo_toolkit[all])\n",
440
+ " Downloading nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl (21.3 MB)\n",
441
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m57.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
442
+ "\u001b[?25hRequirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers<=4.40.2,>=4.36.0->nemo_toolkit[all]) (0.19.1)\n",
443
+ "Collecting botocore<1.35.0,>=1.34.113 (from boto3->nemo_toolkit[all])\n",
444
+ " Downloading botocore-1.34.113-py3-none-any.whl (12.3 MB)\n",
445
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m63.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
446
+ "\u001b[?25hCollecting jmespath<2.0.0,>=0.7.1 (from boto3->nemo_toolkit[all])\n",
447
+ " Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n",
448
+ "Collecting s3transfer<0.11.0,>=0.10.0 (from boto3->nemo_toolkit[all])\n",
449
+ " Downloading s3transfer-0.10.1-py3-none-any.whl (82 kB)\n",
450
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.2/82.2 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
451
+ "\u001b[?25hRequirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->nemo_toolkit[all]) (14.0.2)\n",
452
+ "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets->nemo_toolkit[all]) (0.6)\n",
453
+ "Collecting dill<0.3.9,>=0.3.0 (from datasets->nemo_toolkit[all])\n",
454
+ " Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
455
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
456
+ "\u001b[?25hCollecting xxhash (from datasets->nemo_toolkit[all])\n",
457
+ " Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
458
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m17.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
459
+ "\u001b[?25hCollecting multiprocess (from datasets->nemo_toolkit[all])\n",
460
+ " Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
461
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m15.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
462
+ "\u001b[?25hRequirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->nemo_toolkit[all]) (3.9.5)\n",
463
+ "Collecting pybind11>=2.2 (from fasttext->nemo_toolkit[all])\n",
464
+ " Using cached pybind11-2.12.0-py3-none-any.whl (234 kB)\n",
465
+ "Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from fiddle->nemo_toolkit[all]) (1.4.0)\n",
466
+ "Requirement already satisfied: graphviz in /usr/local/lib/python3.10/dist-packages (from fiddle->nemo_toolkit[all]) (0.20.3)\n",
467
+ "Collecting libcst (from fiddle->nemo_toolkit[all])\n",
468
+ " Downloading libcst-1.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n",
469
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m72.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
470
+ "\u001b[?25hCollecting aniso8601>=0.82 (from flask-restful->nemo_toolkit[all])\n",
471
+ " Downloading aniso8601-9.0.1-py2.py3-none-any.whl (52 kB)\n",
472
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m52.8/52.8 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
473
+ "\u001b[?25hRequirement already satisfied: Flask>=0.8 in /usr/local/lib/python3.10/dist-packages (from flask-restful->nemo_toolkit[all]) (2.2.5)\n",
474
+ "Requirement already satisfied: pytz in /usr/local/lib/python3.10/dist-packages (from flask-restful->nemo_toolkit[all]) (2023.4)\n",
475
+ "Requirement already satisfied: wcwidth<0.3.0,>=0.2.12 in /usr/local/lib/python3.10/dist-packages (from ftfy->nemo_toolkit[all]) (0.2.13)\n",
476
+ "Collecting distance>=0.1.3 (from g2p-en->nemo_toolkit[all])\n",
477
+ " Downloading Distance-0.1.3.tar.gz (180 kB)\n",
478
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m180.3/180.3 kB\u001b[0m \u001b[31m16.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
479
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
480
+ "Requirement already satisfied: pydantic>=1.9.1 in /usr/local/lib/python3.10/dist-packages (from inflect->nemo_toolkit[all]) (2.7.1)\n",
481
+ "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from gdown->nemo_toolkit[all]) (4.12.3)\n",
482
+ "Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (5.5.6)\n",
483
+ "Requirement already satisfied: ipython-genutils~=0.2.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (0.2.0)\n",
484
+ "Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (5.7.1)\n",
485
+ "Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (3.6.6)\n",
486
+ "Requirement already satisfied: ipython>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (7.34.0)\n",
487
+ "Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (3.0.10)\n",
488
+ "Collecting kornia-rs>=0.1.0 (from kornia->nemo_toolkit[all])\n",
489
+ " Downloading kornia_rs-0.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)\n",
490
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m75.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
491
+ "\u001b[?25hCollecting cdifflib (from nemo-text-processing->nemo_toolkit[all])\n",
492
+ " Downloading cdifflib-1.2.6.tar.gz (11 kB)\n",
493
+ " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
494
+ " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
495
+ " Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n",
496
+ " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
497
+ "Collecting pynini==2.1.5 (from nemo-text-processing->nemo_toolkit[all])\n",
498
+ " Downloading pynini-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (161.3 MB)\n",
499
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m161.3/161.3 MB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
500
+ "\u001b[?25hRequirement already satisfied: Cython>=0.29 in /usr/local/lib/python3.10/dist-packages (from pynini==2.1.5->nemo-text-processing->nemo_toolkit[all]) (3.0.10)\n",
501
+ "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from open-clip-torch->nemo_toolkit[all]) (0.18.0+cu121)\n",
502
+ "Collecting timm (from open-clip-torch->nemo_toolkit[all])\n",
503
+ " Downloading timm-1.0.3-py3-none-any.whl (2.3 MB)\n",
504
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m58.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
505
+ "\u001b[?25hRequirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->nemo_toolkit[all]) (2024.1)\n",
506
+ "Requirement already satisfied: sortedcontainers>=2.0.4 in /usr/local/lib/python3.10/dist-packages (from pyannote.core->nemo_toolkit[all]) (2.4.0)\n",
507
+ "Collecting pyannote.database>=4.0.1 (from pyannote.metrics->nemo_toolkit[all])\n",
508
+ " Downloading pyannote.database-5.1.0-py3-none-any.whl (48 kB)\n",
509
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m48.1/48.1 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
510
+ "\u001b[?25hCollecting docopt>=0.6.2 (from pyannote.metrics->nemo_toolkit[all])\n",
511
+ " Downloading docopt-0.6.2.tar.gz (25 kB)\n",
512
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
513
+ "Requirement already satisfied: future>=0.16.0 in /usr/local/lib/python3.10/dist-packages (from pyloudnorm->nemo_toolkit[all]) (0.18.3)\n",
514
+ "Requirement already satisfied: iniconfig in /usr/local/lib/python3.10/dist-packages (from pytest->nemo_toolkit[all]) (2.0.0)\n",
515
+ "Requirement already satisfied: pluggy<2.0,>=0.12 in /usr/local/lib/python3.10/dist-packages (from pytest->nemo_toolkit[all]) (1.5.0)\n",
516
+ "Requirement already satisfied: exceptiongroup>=1.0.0rc8 in /usr/local/lib/python3.10/dist-packages (from pytest->nemo_toolkit[all]) (1.2.1)\n",
517
+ "Collecting ruamel.yaml.clib>=0.2.7 (from ruamel.yaml->nemo_toolkit[all])\n",
518
+ " Downloading ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (526 kB)\n",
519
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m526.7/526.7 kB\u001b[0m \u001b[31m30.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
520
+ "\u001b[?25hCollecting portalocker (from sacrebleu->nemo_toolkit[all])\n",
521
+ " Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)\n",
522
+ "Collecting colorama (from sacrebleu->nemo_toolkit[all])\n",
523
+ " Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n",
524
+ "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from sacrebleu->nemo_toolkit[all]) (4.9.4)\n",
525
+ "Requirement already satisfied: sphinxcontrib-applehelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.8)\n",
526
+ "Requirement already satisfied: sphinxcontrib-devhelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.6)\n",
527
+ "Requirement already satisfied: sphinxcontrib-jsmath in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.1)\n",
528
+ "Requirement already satisfied: sphinxcontrib-htmlhelp>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.0.5)\n",
529
+ "Requirement already satisfied: sphinxcontrib-serializinghtml>=1.1.5 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.1.10)\n",
530
+ "Requirement already satisfied: sphinxcontrib-qthelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.7)\n",
531
+ "Requirement already satisfied: Pygments>=2.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.16.1)\n",
532
+ "Requirement already satisfied: docutils<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (0.18.1)\n",
533
+ "Requirement already satisfied: snowballstemmer>=1.1 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.2.0)\n",
534
+ "Requirement already satisfied: babel>=1.3 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.15.0)\n",
535
+ "Requirement already satisfied: alabaster<0.8,>=0.7 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (0.7.16)\n",
536
+ "Requirement already satisfied: imagesize in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.4.1)\n",
537
+ "Collecting docutils<0.19,>=0.14 (from sphinx->nemo_toolkit[all])\n",
538
+ " Downloading docutils-0.17.1-py2.py3-none-any.whl (575 kB)\n",
539
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m575.5/575.5 kB\u001b[0m \u001b[31m34.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
540
+ "\u001b[?25hCollecting pybtex>=0.24 (from sphinxcontrib-bibtex->nemo_toolkit[all])\n",
541
+ " Downloading pybtex-0.24.0-py2.py3-none-any.whl (561 kB)\n",
542
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m561.4/561.4 kB\u001b[0m \u001b[31m32.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
543
+ "\u001b[?25hCollecting pybtex-docutils>=1.0.0 (from sphinxcontrib-bibtex->nemo_toolkit[all])\n",
544
+ " Downloading pybtex_docutils-1.0.3-py3-none-any.whl (6.4 kB)\n",
545
+ "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (1.64.0)\n",
546
+ "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (2.27.0)\n",
547
+ "Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (1.2.0)\n",
548
+ "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (3.6)\n",
549
+ "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (0.7.2)\n",
550
+ "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (3.0.3)\n",
551
+ "Collecting plac (from texterrors->nemo_toolkit[all])\n",
552
+ " Downloading plac-1.4.3-py2.py3-none-any.whl (22 kB)\n",
553
+ "Collecting loguru (from texterrors->nemo_toolkit[all])\n",
554
+ " Downloading loguru-0.7.2-py3-none-any.whl (62 kB)\n",
555
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.5/62.5 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
556
+ "\u001b[?25hRequirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from texterrors->nemo_toolkit[all]) (2.4.0)\n",
557
+ "Collecting Levenshtein (from texterrors->nemo_toolkit[all])\n",
558
+ " Downloading Levenshtein-0.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (177 kB)\n",
559
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
560
+ "\u001b[?25hCollecting trampoline>=0.1.2 (from torchsde->nemo_toolkit[all])\n",
561
+ " Downloading trampoline-0.1.2-py3-none-any.whl (5.2 kB)\n",
562
+ "Collecting docker-pycreds>=0.4.0 (from wandb->nemo_toolkit[all])\n",
563
+ " Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n",
564
+ "Collecting gitpython!=3.1.29,>=1.0.0 (from wandb->nemo_toolkit[all])\n",
565
+ " Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)\n",
566
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.3/207.3 kB\u001b[0m \u001b[31m19.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
567
+ "\u001b[?25hRequirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb->nemo_toolkit[all]) (5.9.5)\n",
568
+ "Collecting sentry-sdk>=1.0.0 (from wandb->nemo_toolkit[all])\n",
569
+ " Downloading sentry_sdk-2.3.1-py2.py3-none-any.whl (289 kB)\n",
570
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m289.0/289.0 kB\u001b[0m \u001b[31m25.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
571
+ "\u001b[?25hCollecting setproctitle (from wandb->nemo_toolkit[all])\n",
572
+ " Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)\n",
573
+ "Collecting asciitree (from zarr->nemo_toolkit[all])\n",
574
+ " Downloading asciitree-0.3.3.tar.gz (4.0 kB)\n",
575
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
576
+ "Collecting numcodecs>=0.10.0 (from zarr->nemo_toolkit[all])\n",
577
+ " Downloading numcodecs-0.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.7 MB)\n",
578
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.7/7.7 MB\u001b[0m \u001b[31m86.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
579
+ "\u001b[?25hCollecting fasteners (from zarr->nemo_toolkit[all])\n",
580
+ " Downloading fasteners-0.19-py3-none-any.whl (18 kB)\n",
581
+ "Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /usr/local/lib/python3.10/dist-packages (from botocore<1.35.0,>=1.34.113->boto3->nemo_toolkit[all]) (2.0.7)\n",
582
+ "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile->nemo_toolkit[all]) (2.22)\n",
583
+ "Requirement already satisfied: toolz>=0.8.0 in /usr/local/lib/python3.10/dist-packages (from cytoolz>=0.10.1->lhotse>=1.22.0->nemo_toolkit[all]) (0.12.1)\n",
584
+ "Requirement already satisfied: itsdangerous>=2.0 in /usr/local/lib/python3.10/dist-packages (from Flask>=0.8->flask-restful->nemo_toolkit[all]) (2.2.0)\n",
585
+ "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (1.3.1)\n",
586
+ "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (23.2.0)\n",
587
+ "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (1.4.1)\n",
588
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (6.0.5)\n",
589
+ "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (1.9.4)\n",
590
+ "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (4.0.3)\n",
591
+ "Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.29,>=1.0.0->wandb->nemo_toolkit[all])\n",
592
+ " Downloading gitdb-4.0.11-py3-none-any.whl (62 kB)\n",
593
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
594
+ "\u001b[?25hRequirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (5.3.3)\n",
595
+ "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (0.4.0)\n",
596
+ "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (4.9)\n",
597
+ "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<2,>=0.5->tensorboard->nemo_toolkit[all]) (1.3.1)\n",
598
+ "Requirement already satisfied: jupyter-client in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets->nemo_toolkit[all]) (6.1.12)\n",
599
+ "Requirement already satisfied: tornado>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets->nemo_toolkit[all]) (6.3.3)\n",
600
+ "Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all])\n",
601
+ " Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)\n",
602
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━���━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m64.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
603
+ "\u001b[?25hRequirement already satisfied: pickleshare in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.7.5)\n",
604
+ "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (3.0.43)\n",
605
+ "Requirement already satisfied: backcall in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.2.0)\n",
606
+ "Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.1.7)\n",
607
+ "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (4.9.0)\n",
608
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->nemo_toolkit[all]) (2.1.5)\n",
609
+ "Collecting typer>=0.12.1 (from pyannote.database>=4.0.1->pyannote.metrics->nemo_toolkit[all])\n",
610
+ " Downloading typer-0.12.3-py3-none-any.whl (47 kB)\n",
611
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
612
+ "\u001b[?25hCollecting latexcodec>=1.0.4 (from pybtex>=0.24->sphinxcontrib-bibtex->nemo_toolkit[all])\n",
613
+ " Downloading latexcodec-3.0.0-py3-none-any.whl (18 kB)\n",
614
+ "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9.1->inflect->nemo_toolkit[all]) (0.7.0)\n",
615
+ "Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9.1->inflect->nemo_toolkit[all]) (2.18.2)\n",
616
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (3.3.2)\n",
617
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (3.7)\n",
618
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (2024.2.2)\n",
619
+ "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=12->nerfacc>=0.5.3->nemo_toolkit[all]) (3.0.0)\n",
620
+ "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->nemo_toolkit[all]) (1.3.0)\n",
621
+ "Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.10/dist-packages (from widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (6.5.5)\n",
622
+ "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->gdown->nemo_toolkit[all]) (2.5)\n",
623
+ "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata->diffusers>=0.19.3->nemo_toolkit[all]) (3.18.2)\n",
624
+ "INFO: pip is looking at multiple versions of levenshtein to determine which version is compatible with other requirements. This could take a while.\n",
625
+ "Collecting Levenshtein (from texterrors->nemo_toolkit[all])\n",
626
+ " Downloading Levenshtein-0.25.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (177 kB)\n",
627
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m16.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
628
+ "\u001b[?25h Downloading Levenshtein-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (177 kB)\n",
629
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m19.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
630
+ "\u001b[?25h Downloading Levenshtein-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (169 kB)\n",
631
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m169.4/169.4 kB\u001b[0m \u001b[31m19.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
632
+ "\u001b[?25h Downloading Levenshtein-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (172 kB)\n",
633
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m172.9/172.9 kB\u001b[0m \u001b[31m16.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
634
+ "\u001b[?25hRequirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (1.7.1)\n",
635
+ "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb->nemo_toolkit[all])\n",
636
+ " Downloading smmap-5.0.1-py3-none-any.whl (24 kB)\n",
637
+ "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from jedi>=0.16->ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.8.4)\n",
638
+ "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=12->nerfacc>=0.5.3->nemo_toolkit[all]) (0.1.2)\n",
639
+ "Requirement already satisfied: pyzmq<25,>=17 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (24.0.1)\n",
640
+ "Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (23.1.0)\n",
641
+ "Requirement already satisfied: jupyter-core>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (5.7.2)\n",
642
+ "Requirement already satisfied: nbformat in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (5.10.4)\n",
643
+ "Requirement already satisfied: nbconvert>=5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (6.5.4)\n",
644
+ "Requirement already satisfied: nest-asyncio>=1.5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.6.0)\n",
645
+ "Requirement already satisfied: Send2Trash>=1.8.0 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.8.3)\n",
646
+ "Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.18.1)\n",
647
+ "Requirement already satisfied: prometheus-client in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.20.0)\n",
648
+ "Requirement already satisfied: nbclassic>=0.4.7 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.0.0)\n",
649
+ "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.10/dist-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.7.0)\n",
650
+ "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (0.6.0)\n",
651
+ "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard->nemo_toolkit[all]) (3.2.2)\n",
652
+ "Collecting shellingham>=1.3.0 (from typer>=0.12.1->pyannote.database>=4.0.1->pyannote.metrics->nemo_toolkit[all])\n",
653
+ " Downloading shellingham-1.5.4-py2.py3-none-any.whl (9.8 kB)\n",
654
+ "Requirement already satisfied: jupyter-server>=1.8 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.24.0)\n",
655
+ "Requirement already satisfied: notebook-shim>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.2.4)\n",
656
+ "Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (6.1.0)\n",
657
+ "Requirement already satisfied: defusedxml in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.7.1)\n",
658
+ "Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.4)\n",
659
+ "Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.3.0)\n",
660
+ "Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.8.4)\n",
661
+ "Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.10.0)\n",
662
+ "Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.5.1)\n",
663
+ "Requirement already satisfied: tinycss2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.3.0)\n",
664
+ "Requirement already satisfied: fastjsonschema>=2.15 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (2.19.1)\n",
665
+ "Requirement already satisfied: jsonschema>=2.6 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (4.19.2)\n",
666
+ "Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.10/dist-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (21.2.0)\n",
667
+ "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (2023.12.1)\n",
668
+ "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.35.1)\n",
669
+ "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.18.1)\n",
670
+ "Requirement already satisfied: anyio<4,>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (3.7.1)\n",
671
+ "Requirement already satisfied: websocket-client in /usr/local/lib/python3.10/dist-packages (from jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.8.0)\n",
672
+ "Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.5.1)\n",
673
+ "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.3.1)\n",
674
+ "Building wheels for collected packages: causal-conv1d, antlr4-python3-runtime, progress, clip, fasttext, kaldi-python-io, nemo_toolkit, rouge-score, sox, distance, docopt, intervaltree, asciitree, cdifflib\n",
675
+ " Building wheel for causal-conv1d (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
676
+ " Created wheel for causal-conv1d: filename=causal_conv1d-1.2.2.post1-cp310-cp310-linux_x86_64.whl size=103643300 sha256=2bba8823ae89bd79c2d067978e0e533fab8298f69855bfc5d199828b278cf66c\n",
677
+ " Stored in directory: /root/.cache/pip/wheels/22/a7/db/0c9482dec3707ad23181b0eb2da40e4b8f26aaed49752fc49f\n",
678
+ " Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
679
+ " Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144554 sha256=5ba620ca9da88d714c879b4a21820b9bdebd36fb76051b0b48a375e2e4f0fcb7\n",
680
+ " Stored in directory: /root/.cache/pip/wheels/12/93/dd/1f6a127edc45659556564c5730f6d4e300888f4bca2d4c5a88\n",
681
+ " Building wheel for progress (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
682
+ " Created wheel for progress: filename=progress-1.6-py3-none-any.whl size=9614 sha256=8102705b8ef612530f059a82dde5ea899c85e387fb8c5e956ed0fef5f2929103\n",
683
+ " Stored in directory: /root/.cache/pip/wheels/a2/68/5f/c339b20a41659d856c93ccdce6a33095493eb82c3964aac5a1\n",
684
+ " Building wheel for clip (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
685
+ " Created wheel for clip: filename=clip-0.2.0-py3-none-any.whl size=6989 sha256=d8ab11e4cbc0837cde86e4c7011ffabab187b9937f98e39480bb87ec75a34740\n",
686
+ " Stored in directory: /root/.cache/pip/wheels/7f/5c/e6/2c0fdb453a3569188864b17e9676bea8b3b7e160c037117869\n",
687
+ " Building wheel for fasttext (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
688
+ " Created wheel for fasttext: filename=fasttext-0.9.2-cp310-cp310-linux_x86_64.whl size=4227136 sha256=71dc3f2989afb1a6f206ee64ae86bfcfa8381c66960e93ac984be24f2871c66b\n",
689
+ " Stored in directory: /root/.cache/pip/wheels/a5/13/75/f811c84a8ab36eedbaef977a6a58a98990e8e0f1967f98f394\n",
690
+ " Building wheel for kaldi-python-io (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
691
+ " Created wheel for kaldi-python-io: filename=kaldi_python_io-1.2.2-py3-none-any.whl size=8949 sha256=959e8f93e517267e62e51f1e26455214c6b2aba320bb5621fa506730d4ad2ceb\n",
692
+ " Stored in directory: /root/.cache/pip/wheels/b7/23/5f/49d3a826be576faf61d84e8028e1914bb36a5586ee2613b087\n",
693
+ " Building wheel for nemo_toolkit (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
694
+ " Created wheel for nemo_toolkit: filename=nemo_toolkit-2.0.0rc1-py3-none-any.whl size=3709778 sha256=458c9cb158a12a8ddc8c570fde72f15111afe5767ac2a0e485966d2d76e1bda8\n",
695
+ " Stored in directory: /tmp/pip-ephem-wheel-cache-992hxcpb/wheels/c3/4e/45/ab3d29aa73df619f27b371cacf809d5330a18f794879163c1b\n",
696
+ " Building wheel for rouge-score (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
697
+ " Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=076fda87c1a21e7a9fe88f0b3b9a26f7b76171063d76812353b4a30ebe02da51\n",
698
+ " Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n",
699
+ " Building wheel for sox (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
700
+ " Created wheel for sox: filename=sox-1.5.0-py3-none-any.whl size=40038 sha256=717f5186772b8ce84cbbf7b1a01931be688fc591982575b518b2bc327460675d\n",
701
+ " Stored in directory: /root/.cache/pip/wheels/74/e7/7b/8033be3ec5e4994595d01269fc9657c8fd83a0dcbf8536666a\n",
702
+ " Building wheel for distance (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
703
+ " Created wheel for distance: filename=Distance-0.1.3-py3-none-any.whl size=16258 sha256=4863022ee11d6ede70f4b4362c6554629a2e734cb7a0a0212904aeafac36f78e\n",
704
+ " Stored in directory: /root/.cache/pip/wheels/e8/bb/de/f71bf63559ea9a921059a5405806f7ff6ed612a9231c4a9309\n",
705
+ " Building wheel for docopt (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
706
+ " Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13706 sha256=80b5355530de1ea759d79fc19047cdd59679e6b0a014b51dbca811111b1aad36\n",
707
+ " Stored in directory: /root/.cache/pip/wheels/fc/ab/d4/5da2067ac95b36618c629a5f93f809425700506f72c9732fac\n",
708
+ " Building wheel for intervaltree (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
709
+ " Created wheel for intervaltree: filename=intervaltree-3.1.0-py2.py3-none-any.whl size=26096 sha256=e831b80cc0232f925c293997e7be035697c58f5a834060f1b1f6a097fa5502b7\n",
710
+ " Stored in directory: /root/.cache/pip/wheels/fa/80/8c/43488a924a046b733b64de3fac99252674c892a4c3801c0a61\n",
711
+ " Building wheel for asciitree (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
712
+ " Created wheel for asciitree: filename=asciitree-0.3.3-py3-none-any.whl size=5034 sha256=483b21d8a257179dcd00b430bc43c6fce9f97d0442b44433ac2794d3030a48e1\n",
713
+ " Stored in directory: /root/.cache/pip/wheels/7f/4e/be/1171b40f43b918087657ec57cf3b81fa1a2e027d8755baa184\n",
714
+ " Building wheel for cdifflib (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
715
+ " Created wheel for cdifflib: filename=cdifflib-1.2.6-cp310-cp310-linux_x86_64.whl size=27681 sha256=a4929dc925e36d0e71a89e124ce85f0ada2ec5862708eb18c9136da35649ebc1\n",
716
+ " Stored in directory: /root/.cache/pip/wheels/87/a7/fd/8061e24ed08689045cb6d1ca303768dc463b20a5a338174841\n",
717
+ "Successfully built causal-conv1d antlr4-python3-runtime progress clip fasttext kaldi-python-io nemo_toolkit rouge-score sox distance docopt intervaltree asciitree cdifflib\n",
718
+ "Installing collected packages: trampoline, pydub, progress, plac, pangu, opencc, ninja, ijson, docopt, distance, clip, braceexpand, asciitree, antlr4-python3-runtime, aniso8601, addict, xxhash, webdataset, trimesh, textdistance, sox, smmap, shellingham, setproctitle, sentry-sdk, ruamel.yaml.clib, rapidfuzz, pytest-runner, pypinyin, pynini, pybind11, portalocker, pathspec, parameterized, onnx, omegaconf, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, numcodecs, mypy-extensions, marshmallow, markdown2, loguru, lilcom, lightning-utilities, libcst, latexcodec, kornia-rs, kaldiio, kaldi-python-io, jmespath, jedi, isort, intervaltree, ftfy, fasteners, faiss-cpu, einops, docutils, docker-pycreds, dill, decord, cytoolz, colorama, click, cdifflib, attrdict, zarr, sacremoses, sacrebleu, ruamel.yaml, resampy, pytest-mock, pypinyin-dict, PyMCubes, pyloudnorm, pybtex, pyannote.core, nvidia-cusparse-cu12, nvidia-cudnn-cu12, multiprocess, Levenshtein, jiwer, hydra-core, gitdb, fiddle, fasttext, einops-exts, botocore, black, typer, texterrors, s3transfer, rouge-score, pybtex-docutils, nvidia-cusolver-cu12, lhotse, gitpython, flask-restful, diffusers, wandb, transformers, sphinxcontrib-bibtex, pyannote.database, g2p-en, datasets, boto3, torchsde, torchmetrics, torchdiffeq, sentence-transformers, pyannote.metrics, nerfacc, nemo_toolkit, nemo-text-processing, kornia, causal-conv1d, accelerated-scan, timm, pytorch-lightning, taming-transformers, open-clip-torch\n",
719
+ " Attempting uninstall: docutils\n",
720
+ " Found existing installation: docutils 0.18.1\n",
721
+ " Uninstalling docutils-0.18.1:\n",
722
+ " Successfully uninstalled docutils-0.18.1\n",
723
+ " Attempting uninstall: click\n",
724
+ " Found existing installation: click 8.1.7\n",
725
+ " Uninstalling click-8.1.7:\n",
726
+ " Successfully uninstalled click-8.1.7\n",
727
+ " Attempting uninstall: typer\n",
728
+ " Found existing installation: typer 0.9.4\n",
729
+ " Uninstalling typer-0.9.4:\n",
730
+ " Successfully uninstalled typer-0.9.4\n",
731
+ " Attempting uninstall: transformers\n",
732
+ " Found existing installation: transformers 4.41.0\n",
733
+ " Uninstalling transformers-4.41.0:\n",
734
+ " Successfully uninstalled transformers-4.41.0\n",
735
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
736
+ "spacy 3.7.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n",
737
+ "weasel 0.3.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\u001b[0m\u001b[31m\n",
738
+ "\u001b[0mSuccessfully installed Levenshtein-0.22.0 PyMCubes-0.1.4 accelerated-scan-0.2.0 addict-2.4.0 aniso8601-9.0.1 antlr4-python3-runtime-4.9.3 asciitree-0.3.3 attrdict-2.0.1 black-24.4.2 boto3-1.34.113 botocore-1.34.113 braceexpand-0.1.7 causal-conv1d-1.2.2.post1 cdifflib-1.2.6 click-8.0.2 clip-0.2.0 colorama-0.4.6 cytoolz-0.12.3 datasets-2.19.1 decord-0.6.0 diffusers-0.28.0 dill-0.3.8 distance-0.1.3 docker-pycreds-0.4.0 docopt-0.6.2 docutils-0.17.1 einops-0.8.0 einops-exts-0.0.4 faiss-cpu-1.8.0 fasteners-0.19 fasttext-0.9.2 fiddle-0.3.0 flask-restful-0.3.10 ftfy-6.2.0 g2p-en-2.1.0 gitdb-4.0.11 gitpython-3.1.43 hydra-core-1.3.2 ijson-3.2.3 intervaltree-3.1.0 isort-5.13.2 jedi-0.19.1 jiwer-2.5.2 jmespath-1.0.1 kaldi-python-io-1.2.2 kaldiio-2.18.0 kornia-0.7.2 kornia-rs-0.1.3 latexcodec-3.0.0 lhotse-1.23.0 libcst-1.4.0 lightning-utilities-0.11.2 lilcom-1.7 loguru-0.7.2 markdown2-2.4.13 marshmallow-3.21.2 multiprocess-0.70.16 mypy-extensions-1.0.0 nemo-text-processing-1.0.2 nemo_toolkit-2.0.0rc1 nerfacc-0.5.3 ninja-1.11.1.1 numcodecs-0.12.1 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.5.40 nvidia-nvtx-cu12-12.1.105 omegaconf-2.3.0 onnx-1.16.1 open-clip-torch-2.24.0 opencc-1.1.6 pangu-4.0.6.1 parameterized-0.9.0 pathspec-0.12.1 plac-1.4.3 portalocker-2.8.2 progress-1.6 pyannote.core-5.0.0 pyannote.database-5.1.0 pyannote.metrics-3.2.1 pybind11-2.12.0 pybtex-0.24.0 pybtex-docutils-1.0.3 pydub-0.25.1 pyloudnorm-0.1.1 pynini-2.1.5 pypinyin-0.51.0 pypinyin-dict-0.8.0 pytest-mock-3.14.0 pytest-runner-6.0.1 pytorch-lightning-2.2.5 rapidfuzz-2.13.7 resampy-0.4.3 rouge-score-0.1.2 ruamel.yaml-0.18.6 ruamel.yaml.clib-0.2.8 s3transfer-0.10.1 sacrebleu-2.4.2 sacremoses-0.1.1 sentence-transformers-2.7.0 sentry-sdk-2.3.1 setproctitle-1.3.3 shellingham-1.5.4 smmap-5.0.1 sox-1.5.0 sphinxcontrib-bibtex-2.6.2 taming-transformers-0.0.1 textdistance-4.6.2 texterrors-0.4.4 timm-1.0.3 torchdiffeq-0.2.3 torchmetrics-1.4.0.post0 torchsde-0.2.6 trampoline-0.1.2 transformers-4.40.2 trimesh-4.4.0 typer-0.12.3 wandb-0.17.0 webdataset-0.2.86 xxhash-3.4.1 zarr-2.18.2\n"
739
+ ]
740
+ }
741
+ ],
742
+ "source": [
743
+ "!pip install wget\n",
744
+ "!apt-get install sox libsndfile1 ffmpeg\n",
745
+ "\n",
746
+ "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@1fa961ba03ab5f8c91b278640e29807079373372#egg=nemo_toolkit[all]"
747
+ ]
748
+ },
749
+ {
750
+ "cell_type": "code",
751
+ "source": [
752
+ "import hydra\n",
753
+ "import soundfile as sf\n",
754
+ "import torch\n",
755
+ "from omegaconf import OmegaConf"
756
+ ],
757
+ "metadata": {
758
+ "id": "cBz_fQ6KbzrZ"
759
+ },
760
+ "execution_count": null,
761
+ "outputs": []
762
+ },
763
+ {
764
+ "cell_type": "markdown",
765
+ "source": [
766
+ "### Downloading config, weights and audio example"
767
+ ],
768
+ "metadata": {
769
+ "id": "3Fvy0phvhr0G"
770
+ }
771
+ },
772
+ {
773
+ "cell_type": "code",
774
+ "source": [
775
+ "import locale\n",
776
+ "\n",
777
+ "locale.getpreferredencoding = lambda: \"UTF-8\"\n",
778
+ "\n",
779
+ "# Loading weights, config and example wav for CTC-model\n",
780
+ "!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ssl_model_weights.ckpt\n",
781
+ "!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/encoder_config.yaml\n",
782
+ "!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/example.wav"
783
+ ],
784
+ "metadata": {
785
+ "colab": {
786
+ "base_uri": "https://localhost:8080/"
787
+ },
788
+ "id": "0EHgk_I6hrGI",
789
+ "outputId": "4c9ac38d-eeca-4da4-af26-aa41becfed00"
790
+ },
791
+ "execution_count": null,
792
+ "outputs": [
793
+ {
794
+ "output_type": "stream",
795
+ "name": "stdout",
796
+ "text": [
797
+ "--2024-05-28 07:12:41-- https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/example.wav\n",
798
+ "Resolving n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)... 37.230.193.192\n",
799
+ "Connecting to n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)|37.230.193.192|:443... connected.\n",
800
+ "HTTP request sent, awaiting response... 200 OK\n",
801
+ "Length: 361324 (353K) [application/octet-stream]\n",
802
+ "Saving to: ‘example.wav’\n",
803
+ "\n",
804
+ "example.wav 100%[===================>] 352.86K 583KB/s in 0.6s \n",
805
+ "\n",
806
+ "2024-05-28 07:12:42 (583 KB/s) - ‘example.wav’ saved [361324/361324]\n",
807
+ "\n"
808
+ ]
809
+ }
810
+ ]
811
+ },
812
+ {
813
+ "cell_type": "markdown",
814
+ "source": [
815
+ "### Model instantiating and *inference*"
816
+ ],
817
+ "metadata": {
818
+ "id": "FUA6Ah1blyHv"
819
+ }
820
+ },
821
+ {
822
+ "cell_type": "code",
823
+ "source": [
824
+ "class SpecScaler(torch.nn.Module):\n",
825
+ " def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
826
+ " return torch.log(x.clamp_(1e-9, 1e9))\n",
827
+ "\n",
828
+ "\n",
829
+ "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
830
+ "encoder_config = \"encoder_config.yaml\"\n",
831
+ "model_weights = \"ssl_model_weights.ckpt\"\n",
832
+ "audio_path = \"example.wav\"\n",
833
+ "\n",
834
+ "conf = OmegaConf.load(encoder_config)\n",
835
+ "\n",
836
+ "encoder = hydra.utils.instantiate(conf.encoder)\n",
837
+ "ckpt = torch.load(model_weights, map_location=\"cpu\")\n",
838
+ "encoder.load_state_dict(ckpt, strict=True)\n",
839
+ "encoder.to(device)\n",
840
+ "\n",
841
+ "feature_extractor = hydra.utils.instantiate(conf.feature_extractor)\n",
842
+ "\n",
843
+ "audio_signal, _ = sf.read(audio_path, dtype=\"float32\")\n",
844
+ "features = feature_extractor(torch.tensor(audio_signal).float())\n",
845
+ "features = features.to(device)\n",
846
+ "\n",
847
+ "encoded, _ = encoder.forward(\n",
848
+ " audio_signal=features.unsqueeze(0),\n",
849
+ " length=torch.tensor([features.shape[-1]]).to(device),\n",
850
+ ")\n",
851
+ "print(f\"encoded signal shape: {encoded.shape}\")"
852
+ ],
853
+ "metadata": {
854
+ "colab": {
855
+ "base_uri": "https://localhost:8080/"
856
+ },
857
+ "id": "AsUapeJKh3cz",
858
+ "outputId": "ee5bc82b-4526-4364-ef7c-decd59cdbc5f"
859
+ },
860
+ "execution_count": null,
861
+ "outputs": [
862
+ {
863
+ "output_type": "stream",
864
+ "name": "stdout",
865
+ "text": [
866
+ "encoded signal shape: torch.Size([1, 768, 283])\n"
867
+ ]
868
+ }
869
+ ]
870
+ },
871
+ {
872
+ "cell_type": "code",
873
+ "source": [],
874
+ "metadata": {
875
+ "id": "p1yWHEU5Dn60"
876
+ },
877
+ "execution_count": null,
878
+ "outputs": []
879
+ }
880
+ ]
881
+ }
Examples/notebooks/GigaAM_RNNT_Model_Usage_Example.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Examples/rnnt_inference.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ import torch
4
+ import torchaudio
5
+ from nemo.collections.asr.models import EncDecRNNTBPEModel
6
+ from nemo.collections.asr.modules.audio_preprocessing import (
7
+ AudioToMelSpectrogramPreprocessor as NeMoAudioToMelSpectrogramPreprocessor,
8
+ )
9
+ from nemo.collections.asr.parts.preprocessing.features import (
10
+ FilterbankFeaturesTA as NeMoFilterbankFeaturesTA,
11
+ )
12
+ from omegaconf import OmegaConf, open_dict
13
+
14
+
15
+ class FilterbankFeaturesTA(NeMoFilterbankFeaturesTA):
16
+ def __init__(self, mel_scale: str = "htk", wkwargs=None, **kwargs):
17
+ if "window_size" in kwargs:
18
+ del kwargs["window_size"]
19
+ if "window_stride" in kwargs:
20
+ del kwargs["window_stride"]
21
+
22
+ super().__init__(**kwargs)
23
+
24
+ self._mel_spec_extractor = torchaudio.transforms.MelSpectrogram(
25
+ sample_rate=self._sample_rate,
26
+ win_length=self.win_length,
27
+ hop_length=self.hop_length,
28
+ n_mels=kwargs["nfilt"],
29
+ window_fn=self.torch_windows[kwargs["window"]],
30
+ mel_scale=mel_scale,
31
+ norm=kwargs["mel_norm"],
32
+ n_fft=kwargs["n_fft"],
33
+ f_max=kwargs.get("highfreq", None),
34
+ f_min=kwargs.get("lowfreq", 0),
35
+ wkwargs=wkwargs,
36
+ )
37
+
38
+
39
+ class AudioToMelSpectrogramPreprocessor(NeMoAudioToMelSpectrogramPreprocessor):
40
+ def __init__(self, mel_scale: str = "htk", **kwargs):
41
+ super().__init__(**kwargs)
42
+ kwargs["nfilt"] = kwargs["features"]
43
+ del kwargs["features"]
44
+ self.featurizer = (
45
+ FilterbankFeaturesTA( # Deprecated arguments; kept for config compatibility
46
+ mel_scale=mel_scale,
47
+ **kwargs,
48
+ )
49
+ )
50
+
51
+
52
+ def _parse_args():
53
+ parser = argparse.ArgumentParser(
54
+ description="Run inference using GigaAM-RNNT checkpoint"
55
+ )
56
+ parser.add_argument(
57
+ "--model_config", help="Path to GigaAM-RNNT config file (.yaml)"
58
+ )
59
+ parser.add_argument(
60
+ "--model_weights", help="Path to GigaAM-RNNT checkpoint file (.ckpt)"
61
+ )
62
+ parser.add_argument("--tokenizer_path", help="Path to tokenizer directory")
63
+ parser.add_argument("--audio_path", help="Path to audio signal")
64
+ parser.add_argument("--device", help="Device: cpu / cuda")
65
+ return parser.parse_args()
66
+
67
+
68
+ def main(
69
+ model_config: str,
70
+ model_weights: str,
71
+ tokenizer_path: str,
72
+ device: str,
73
+ audio_path: str,
74
+ ):
75
+ config = OmegaConf.load(model_config)
76
+ with open_dict(config):
77
+ config.tokenizer.dir = tokenizer_path
78
+
79
+ model = EncDecRNNTBPEModel.from_config_dict(config)
80
+
81
+ ckpt = torch.load(model_weights, map_location="cpu")
82
+ model.load_state_dict(ckpt, strict=False)
83
+ model = model.to(device)
84
+ model.eval()
85
+
86
+ transcription = model.transcribe([audio_path])[0][0]
87
+ print(f"transcription: {transcription}")
88
+
89
+
90
+ if __name__ == "__main__":
91
+ args = _parse_args()
92
+ main(
93
+ model_config=args.model_config,
94
+ model_weights=args.model_weights,
95
+ tokenizer_path=args.tokenizer_path,
96
+ device=args.device,
97
+ audio_path=args.audio_path,
98
+ )
Examples/rnnt_longform_inference.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from io import BytesIO
3
+ from typing import List, Tuple
4
+
5
+ import numpy as np
6
+ import torch
7
+ import torchaudio
8
+ from nemo.collections.asr.models import EncDecRNNTBPEModel
9
+ from nemo.collections.asr.modules.audio_preprocessing import (
10
+ AudioToMelSpectrogramPreprocessor as NeMoAudioToMelSpectrogramPreprocessor,
11
+ )
12
+ from nemo.collections.asr.parts.preprocessing.features import (
13
+ FilterbankFeaturesTA as NeMoFilterbankFeaturesTA,
14
+ )
15
+ from omegaconf import OmegaConf, open_dict
16
+ from pyannote.audio import Pipeline
17
+ from pydub import AudioSegment
18
+
19
+
20
+ class FilterbankFeaturesTA(NeMoFilterbankFeaturesTA):
21
+ def __init__(self, mel_scale: str = "htk", wkwargs=None, **kwargs):
22
+ if "window_size" in kwargs:
23
+ del kwargs["window_size"]
24
+ if "window_stride" in kwargs:
25
+ del kwargs["window_stride"]
26
+
27
+ super().__init__(**kwargs)
28
+
29
+ self._mel_spec_extractor = torchaudio.transforms.MelSpectrogram(
30
+ sample_rate=self._sample_rate,
31
+ win_length=self.win_length,
32
+ hop_length=self.hop_length,
33
+ n_mels=kwargs["nfilt"],
34
+ window_fn=self.torch_windows[kwargs["window"]],
35
+ mel_scale=mel_scale,
36
+ norm=kwargs["mel_norm"],
37
+ n_fft=kwargs["n_fft"],
38
+ f_max=kwargs.get("highfreq", None),
39
+ f_min=kwargs.get("lowfreq", 0),
40
+ wkwargs=wkwargs,
41
+ )
42
+
43
+
44
+ class AudioToMelSpectrogramPreprocessor(NeMoAudioToMelSpectrogramPreprocessor):
45
+ def __init__(self, mel_scale: str = "htk", **kwargs):
46
+ super().__init__(**kwargs)
47
+ kwargs["nfilt"] = kwargs["features"]
48
+ del kwargs["features"]
49
+ self.featurizer = (
50
+ FilterbankFeaturesTA( # Deprecated arguments; kept for config compatibility
51
+ mel_scale=mel_scale,
52
+ **kwargs,
53
+ )
54
+ )
55
+
56
+
57
+ def audiosegment_to_numpy(audiosegment: AudioSegment) -> np.ndarray:
58
+ """Convert AudioSegment to numpy array."""
59
+ samples = np.array(audiosegment.get_array_of_samples())
60
+ if audiosegment.channels == 2:
61
+ samples = samples.reshape((-1, 2))
62
+
63
+ samples = samples.astype(np.float32, order="C") / 32768.0
64
+ return samples
65
+
66
+
67
+ def format_time(seconds: float) -> str:
68
+ hours = int(seconds // 3600)
69
+ minutes = int((seconds % 3600) // 60)
70
+ seconds = seconds % 60
71
+ full_seconds = int(seconds)
72
+ milliseconds = int((seconds - full_seconds) * 100)
73
+
74
+ if hours > 0:
75
+ return f"{hours:02}:{minutes:02}:{full_seconds:02}:{milliseconds:02}"
76
+ else:
77
+ return f"{minutes:02}:{full_seconds:02}:{milliseconds:02}"
78
+
79
+
80
+ def segment_audio(
81
+ audio_path: str,
82
+ pipeline: Pipeline,
83
+ max_duration: float = 22.0,
84
+ min_duration: float = 15.0,
85
+ new_chunk_threshold: float = 0.2,
86
+ ) -> Tuple[List[np.ndarray], List[List[float]]]:
87
+ # Prepare audio for pyannote vad pipeline
88
+ audio = AudioSegment.from_wav(audio_path)
89
+ audio_bytes = BytesIO()
90
+ audio.export(audio_bytes, format="wav")
91
+ audio_bytes.seek(0)
92
+
93
+ # Process audio with pipeline to obtain segments with speech activity
94
+ sad_segments = pipeline({"uri": "filename", "audio": audio_bytes})
95
+
96
+ segments = []
97
+ curr_duration = 0
98
+ curr_start = 0
99
+ curr_end = 0
100
+ boundaries = []
101
+
102
+ # Concat segments from pipeline into chunks for asr according to max/min duration
103
+ for segment in sad_segments.get_timeline().support():
104
+ start = max(0, segment.start)
105
+ end = min(len(audio) / 1000, segment.end)
106
+ if (
107
+ curr_duration > min_duration and start - curr_end > new_chunk_threshold
108
+ ) or (curr_duration + (end - curr_end) > max_duration):
109
+ audio_segment = audiosegment_to_numpy(
110
+ audio[curr_start * 1000 : curr_end * 1000]
111
+ )
112
+ segments.append(audio_segment)
113
+ boundaries.append([curr_start, curr_end])
114
+ curr_start = start
115
+
116
+ curr_end = end
117
+ curr_duration = curr_end - curr_start
118
+
119
+ if curr_duration != 0:
120
+ audio_segment = audiosegment_to_numpy(
121
+ audio[curr_start * 1000 : curr_end * 1000]
122
+ )
123
+ segments.append(audio_segment)
124
+ boundaries.append([curr_start, curr_end])
125
+
126
+ return segments, boundaries
127
+
128
+
129
+ def _parse_args():
130
+ parser = argparse.ArgumentParser(
131
+ description="Run long-form inference using GigaAM-RNNT checkpoint"
132
+ )
133
+ parser.add_argument(
134
+ "--model_config", help="Path to GigaAM-RNNT config file (.yaml)"
135
+ )
136
+ parser.add_argument(
137
+ "--model_weights", help="Path to GigaAM-RNNT checkpoint file (.ckpt)"
138
+ )
139
+ parser.add_argument("--tokenizer_path", help="Path to tokenizer directory")
140
+ parser.add_argument("--audio_path", help="Path to audio signal")
141
+ parser.add_argument(
142
+ "--hf_token", help="HuggingFace token for using pyannote Pipeline"
143
+ )
144
+ parser.add_argument("--device", help="Device: cpu / cuda")
145
+ parser.add_argument("--fp16", help="Run in FP16 mode", default=True)
146
+ parser.add_argument(
147
+ "--batch_size", help="Batch size for acoustic model inference", default=10
148
+ )
149
+ return parser.parse_args()
150
+
151
+
152
+ def main(
153
+ model_config: str,
154
+ model_weights: str,
155
+ tokenizer_path: str,
156
+ device: str,
157
+ audio_path: str,
158
+ hf_token: str,
159
+ fp16: bool,
160
+ batch_size: int = 10,
161
+ ):
162
+ # Initialize model
163
+ config = OmegaConf.load(model_config)
164
+ with open_dict(config):
165
+ config.tokenizer.dir = tokenizer_path
166
+
167
+ model = EncDecRNNTBPEModel.from_config_dict(config)
168
+ ckpt = torch.load(model_weights, map_location="cpu")
169
+ model.load_state_dict(ckpt, strict=False)
170
+ model = model.to(device)
171
+ if device != "cpu" and fp16:
172
+ model = model.half()
173
+ model.preprocessor = model.preprocessor.float()
174
+ model.eval()
175
+
176
+ # Initialize pyannote pipeline
177
+ pipeline = Pipeline.from_pretrained(
178
+ "pyannote/voice-activity-detection", use_auth_token=hf_token
179
+ )
180
+ pipeline = pipeline.to(torch.device(device))
181
+
182
+ # Segment audio
183
+ segments, boundaries = segment_audio(audio_path, pipeline)
184
+
185
+ # Transcribe segments
186
+ transcriptions = []
187
+ if device != "cpu" and fp16:
188
+ with torch.autocast(device_type="cuda", dtype=torch.float16):
189
+ transcriptions = model.transcribe(segments, batch_size=batch_size)[0]
190
+ else:
191
+ transcriptions = model.transcribe(segments, batch_size=batch_size)[0]
192
+
193
+ for transcription, boundary in zip(transcriptions, boundaries):
194
+ print(
195
+ f"[{format_time(boundary[0])} - {format_time(boundary[1])}]: {transcription}\n"
196
+ )
197
+
198
+
199
+ if __name__ == "__main__":
200
+ args = _parse_args()
201
+ main(
202
+ model_config=args.model_config,
203
+ model_weights=args.model_weights,
204
+ tokenizer_path=args.tokenizer_path,
205
+ device=args.device,
206
+ audio_path=args.audio_path,
207
+ hf_token=args.hf_token,
208
+ fp16=args.fp16,
209
+ batch_size=args.batch_size,
210
+ )
Examples/ssl_inference.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ import hydra
4
+ import soundfile
5
+ import torch
6
+ from omegaconf import OmegaConf
7
+
8
+
9
+ class SpecScaler(torch.nn.Module):
10
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
11
+ return torch.log(x.clamp_(1e-9, 1e9))
12
+
13
+
14
+ def _parse_args():
15
+ parser = argparse.ArgumentParser(
16
+ description="Run inference using GigaAM checkpoint"
17
+ )
18
+ parser.add_argument("--encoder_config", help="Path to GigaAM config file (.yaml)")
19
+ parser.add_argument(
20
+ "--model_weights", help="Path to GigaAM checkpoint file (.ckpt)"
21
+ )
22
+ parser.add_argument("--audio_path", help="Path to audio signal")
23
+ parser.add_argument("--device", help="Device: cpu / cuda")
24
+ return parser.parse_args()
25
+
26
+
27
+ def main(encoder_config: str, model_weights: str, device: str, audio_path: str):
28
+ conf = OmegaConf.load(encoder_config)
29
+
30
+ encoder = hydra.utils.instantiate(conf.encoder)
31
+ ckpt = torch.load(model_weights, map_location="cpu")
32
+ encoder.load_state_dict(ckpt, strict=True)
33
+ encoder.to(device)
34
+
35
+ feature_extractor = hydra.utils.instantiate(conf.feature_extractor)
36
+
37
+ audio_signal, _ = soundfile.read(audio_path, dtype="float32")
38
+ features = feature_extractor(torch.tensor(audio_signal).float())
39
+ features = features.to(device)
40
+
41
+ encoded, _ = encoder.forward(
42
+ audio_signal=features.unsqueeze(0),
43
+ length=torch.tensor([features.shape[-1]]).to(device),
44
+ )
45
+ print(f"encoded signal shape: {encoded.shape}")
46
+
47
+
48
+ if __name__ == "__main__":
49
+ args = _parse_args()
50
+ main(
51
+ encoder_config=args.encoder_config,
52
+ model_weights=args.model_weights,
53
+ device=args.device,
54
+ audio_path=args.audio_path,
55
+ )
GigaAM-CTC/ctc_model_config.yaml ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_class: enc_dec_ctc_char
2
+ sample_rate: 16000
3
+ log_prediction: true
4
+ ctc_reduction: mean_batch
5
+ labels:
6
+ - ' '
7
+ - а
8
+ - б
9
+ - в
10
+ - г
11
+ - д
12
+ - е
13
+ - ж
14
+ - з
15
+ - и
16
+ - й
17
+ - к
18
+ - л
19
+ - м
20
+ - н
21
+ - о
22
+ - п
23
+ - р
24
+ - с
25
+ - т
26
+ - у
27
+ - ф
28
+ - х
29
+ - ц
30
+ - ч
31
+ - ш
32
+ - щ
33
+ - ъ
34
+ - ы
35
+ - ь
36
+ - э
37
+ - ю
38
+ - я
39
+
40
+ preprocessor:
41
+ _target_: __main__.AudioToMelSpectrogramPreprocessor
42
+ sample_rate: 16000
43
+ n_fft: 400
44
+ n_window_size: 400
45
+ window_size: null
46
+ n_window_stride: 160
47
+ window_stride: null
48
+ features: 64
49
+ dither: 0.0
50
+ preemph: null
51
+ log: true
52
+ log_zero_guard_type: clamp
53
+ normalize: null
54
+ pad_to: 0
55
+ mel_norm: null
56
+ window: hann
57
+ log_zero_guard_value: 1e-9
58
+
59
+ train_ds:
60
+ batch_size: 10
61
+ trim_silence: false
62
+ max_duration: 25.0
63
+ min_duration: 0.1
64
+ shuffle: true
65
+ is_tarred: false
66
+ num_workers: 8
67
+ pin_memory: true
68
+ manifest_filepath: null
69
+ labels:
70
+ - ' '
71
+ - а
72
+ - б
73
+ - в
74
+ - г
75
+ - д
76
+ - е
77
+ - ж
78
+ - з
79
+ - и
80
+ - й
81
+ - к
82
+ - л
83
+ - м
84
+ - н
85
+ - о
86
+ - п
87
+ - р
88
+ - с
89
+ - т
90
+ - у
91
+ - ф
92
+ - х
93
+ - ц
94
+ - ч
95
+ - ш
96
+ - щ
97
+ - ъ
98
+ - ы
99
+ - ь
100
+ - э
101
+ - ю
102
+ - я
103
+
104
+ validation_ds:
105
+ batch_size: 20
106
+ shuffle: false
107
+ num_workers: 4
108
+ min_duration: 0.1
109
+ pin_memory: true
110
+ manifest_filepath: null
111
+ labels:
112
+ - ' '
113
+ - а
114
+ - б
115
+ - в
116
+ - г
117
+ - д
118
+ - е
119
+ - ж
120
+ - з
121
+ - и
122
+ - й
123
+ - к
124
+ - л
125
+ - м
126
+ - н
127
+ - о
128
+ - п
129
+ - р
130
+ - с
131
+ - т
132
+ - у
133
+ - ф
134
+ - х
135
+ - ц
136
+ - ч
137
+ - ш
138
+ - щ
139
+ - ъ
140
+ - ы
141
+ - ь
142
+ - э
143
+ - ю
144
+ - я
145
+
146
+ test_ds:
147
+ manifest_filepath: null
148
+ batch_size: 100
149
+ shuffle: false
150
+ num_workers: 4
151
+ pin_memory: true
152
+ labels:
153
+ - ' '
154
+ - а
155
+ - б
156
+ - в
157
+ - г
158
+ - д
159
+ - е
160
+ - ж
161
+ - з
162
+ - и
163
+ - й
164
+ - к
165
+ - л
166
+ - м
167
+ - н
168
+ - о
169
+ - п
170
+ - р
171
+ - с
172
+ - т
173
+ - у
174
+ - ф
175
+ - х
176
+ - ц
177
+ - ч
178
+ - ш
179
+ - щ
180
+ - ъ
181
+ - ы
182
+ - ь
183
+ - э
184
+ - ю
185
+ - я
186
+ spec_augment:
187
+ _target_: nemo.collections.asr.modules.SpectrogramAugmentation
188
+ freq_masks: 2
189
+ time_masks: 10
190
+ freq_width: 27
191
+ time_width: 0.05
192
+ encoder:
193
+ _target_: nemo.collections.asr.modules.ConformerEncoder
194
+ feat_in: 64
195
+ feat_out: -1
196
+ n_layers: 16
197
+ d_model: 768
198
+ subsampling: striding
199
+ subsampling_factor: 4
200
+ subsampling_conv_channels: 768
201
+ ff_expansion_factor: 4
202
+ self_attention_model: rel_pos
203
+ pos_emb_max_len: 5000
204
+ n_heads: 16
205
+ xscaling: false
206
+ untie_biases: true
207
+ conv_kernel_size: 31
208
+ dropout: 0.1
209
+ dropout_emb: 0.1
210
+ dropout_att: 0.1
211
+ decoder:
212
+ _target_: nemo.collections.asr.modules.ConvASRDecoder
213
+ feat_in: 768
214
+ num_classes: 33
215
+ vocabulary:
216
+ - ' '
217
+ - а
218
+ - б
219
+ - в
220
+ - г
221
+ - д
222
+ - е
223
+ - ж
224
+ - з
225
+ - и
226
+ - й
227
+ - к
228
+ - л
229
+ - м
230
+ - н
231
+ - о
232
+ - п
233
+ - р
234
+ - с
235
+ - т
236
+ - у
237
+ - ф
238
+ - х
239
+ - ц
240
+ - ч
241
+ - ш
242
+ - щ
243
+ - ъ
244
+ - ы
245
+ - ь
246
+ - э
247
+ - ю
248
+ - я
249
+ optim:
250
+ name: adamw
251
+ lr: 5.0e-05
252
+ betas:
253
+ - 0.9
254
+ - 0.98
255
+ weight_decay: 0.01
256
+ sched:
257
+ name: CosineAnnealing
258
+ warmup_steps: 10000
259
+ warmup_ratio: null
260
+ min_lr: 1.0e-07
261
+ nemo_version: 1.12.0
262
+ decoding:
263
+ strategy: greedy
264
+ preserve_alignments: null
265
+ compute_timestamps: null
266
+ word_seperator: ' '
267
+ ctc_timestamp_type: all
268
+ batch_dim_index: 0
269
+ greedy:
270
+ preserve_alignments: false
271
+ compute_timestamps: false
GigaAM-CTC/ctc_model_weights.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6813e52607414d6006ac30a392087cb8d716afce7e0319a38bcb744ba741d2dc
3
+ size 968535213
GigaAM-Emo/emo_model_config.yaml ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ id2name:
2
+ - 'angry'
3
+ - 'sad'
4
+ - 'neutral'
5
+ - 'positive'
6
+
7
+ feature_extractor:
8
+ _target_: torch.nn.Sequential
9
+ _args_:
10
+ - _target_: torchaudio.transforms.MelSpectrogram
11
+ sample_rate: 16000
12
+ n_fft: 400
13
+ win_length: 400
14
+ hop_length: 160
15
+ n_mels: 64
16
+ - _target_: __main__.SpecScaler
17
+
18
+ encoder:
19
+ _target_: nemo.collections.asr.modules.ConformerEncoder
20
+ feat_in: 64
21
+ feat_out: -1
22
+ n_layers: 16
23
+ d_model: 768
24
+ subsampling: striding
25
+ subsampling_factor: 4
26
+ subsampling_conv_channels: 768
27
+ ff_expansion_factor: 4
28
+ self_attention_model: rel_pos
29
+ pos_emb_max_len: 5000
30
+ n_heads: 16
31
+ xscaling: false
32
+ untie_biases: true
33
+ conv_kernel_size: 31
34
+
35
+ classification_head:
36
+ _target_: torch.nn.Linear
37
+ in_features: 768
38
+ out_features: 4
GigaAM-Emo/emo_model_weights.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a8530d7573e0f0cd78c48c91345bd67c09a8eb4b15913baab77590140b9ecb0
3
+ size 968409626
GigaAM-RNNT/rnnt_model_config.yaml ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_class: enc_dec_rnnt_bpe
2
+ sample_rate: 16000
3
+ log_prediction: true
4
+ model_defaults:
5
+ enc_hidden: 768
6
+ pred_hidden: 320
7
+ join_hidden: 320
8
+
9
+ preprocessor:
10
+ _target_: __main__.AudioToMelSpectrogramPreprocessor
11
+ sample_rate: 16000
12
+ n_fft: 400
13
+ n_window_size: 400
14
+ window_size: null
15
+ n_window_stride: 160
16
+ window_stride: null
17
+ features: 64
18
+ dither: 0.0
19
+ preemph: null
20
+ log: true
21
+ log_zero_guard_type: clamp
22
+ normalize: null
23
+ pad_to: 0
24
+ mel_norm: null
25
+ window: hann
26
+ log_zero_guard_value: 1e-9
27
+
28
+ tokenizer:
29
+ dir: tokenizer_all_sets/
30
+ type: bpe
31
+
32
+ validation_ds:
33
+ shuffle: False
34
+ manifest_filepath: null
35
+
36
+ encoder:
37
+ _target_: nemo.collections.asr.modules.ConformerEncoder
38
+ feat_in: 64
39
+ feat_out: -1
40
+ n_layers: 16
41
+ d_model: 768
42
+ subsampling: striding
43
+ subsampling_factor: 4
44
+ subsampling_conv_channels: 768
45
+ ff_expansion_factor: 4
46
+ self_attention_model: rel_pos
47
+ pos_emb_max_len: 5000
48
+ n_heads: 16
49
+ xscaling: false
50
+ untie_biases: true
51
+ conv_kernel_size: 31
52
+ dropout: 0.1
53
+ dropout_emb: 0.1
54
+ dropout_att: 0.1
55
+ decoder:
56
+ _target_: nemo.collections.asr.modules.RNNTDecoder
57
+ normalization_mode: null
58
+ random_state_sampling: false
59
+ blank_as_pad: true
60
+ vocab_size: 512
61
+ prednet:
62
+ pred_hidden: 320
63
+ pred_rnn_layers: 1
64
+ t_max: null
65
+ dropout: 0.0
66
+ joint:
67
+ _target_: nemo.collections.asr.modules.RNNTJoint
68
+ log_softmax: null
69
+ fuse_loss_wer: false
70
+ fused_batch_size: 1
71
+ jointnet:
72
+ joint_hidden: 320
73
+ activation: relu
74
+ dropout: 0.0
75
+ encoder_hidden: 768
76
+ optim:
77
+ name: adamw
78
+ lr: 5.0e-05
79
+ betas:
80
+ - 0.9
81
+ - 0.98
82
+ weight_decay: 0.01
83
+ sched:
84
+ name: CosineAnnealing
85
+ warmup_steps: 10000
86
+ warmup_ratio: null
87
+ min_lr: 1.0e-07
88
+ nemo_version: 1.12.0
89
+ decoding:
90
+ strategy: greedy_batch
91
+ preserve_alignments: false
92
+ greedy:
93
+ max_symbols: 3
94
+ beam:
95
+ beam_size: 5
96
+ score_norm: true
97
+
98
+
99
+ loss:
100
+ loss_name: default
101
+ mwer: false
102
+ rnnt_reduction: mean_batch
103
+ wer_coef: false
104
+ subtract_mean: true
105
+ warprnnt_numba_kwargs:
106
+ fastemit_lambda: 0.0
107
+ clamp: -1.0
108
+ rnnt_weight: 0.1
109
+ unique_hyp: true
GigaAM-RNNT/rnnt_model_weights.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9311712a085aba1b103c325f4965faa7b32e950bf0b724720103a94d204d2a9
3
+ size 974419733
GigaAM/encoder_config.yaml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ feature_extractor:
2
+ _target_: torch.nn.Sequential
3
+ _args_:
4
+ - _target_: torchaudio.transforms.MelSpectrogram
5
+ sample_rate: 16000
6
+ n_fft: 400
7
+ win_length: 400
8
+ hop_length: 160
9
+ n_mels: 64
10
+ - _target_: __main__.SpecScaler
11
+
12
+ encoder:
13
+ _target_: nemo.collections.asr.modules.ConformerEncoder
14
+ feat_in: 64
15
+ feat_out: -1
16
+ n_layers: 16
17
+ d_model: 768
18
+ subsampling: striding
19
+ subsampling_factor: 4
20
+ subsampling_conv_channels: 768
21
+ ff_expansion_factor: 4
22
+ self_attention_model: rel_pos
23
+ pos_emb_max_len: 5000
24
+ n_heads: 16
25
+ xscaling: false
26
+ untie_biases: true
27
+ conv_kernel_size: 31
GigaAM/ssl_model_weights.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fea2e9cee640c931a159667c9f1d82519e789087966ed412c77c0b7e69a35073
3
+ size 968385941
README.md ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GigaAM: the family of open-source acoustic models for speech processing
2
+
3
+ ![plot](./gigaam_scheme.svg)
4
+
5
+ ## Table of contents
6
+
7
+ * [GigaAM](#gigaam)
8
+ * [GigaAM for Speech Recognition](#gigaam-for-speech-recognition)
9
+ * [GigaAM-CTC](#gigaam-ctc)
10
+ * [GigaAM-RNNT](#gigaam-rnnt)
11
+ * [GigaAM-Emo](#gigaam-emo)
12
+ * [Links](#links)
13
+
14
+ ## GigaAM
15
+
16
+ GigaAM (**Giga** **A**coustic **M**odel) is a [Conformer](https://arxiv.org/pdf/2005.08100.pdf)-based [wav2vec2](https://arxiv.org/pdf/2006.11477.pdf) foundational model (around 240M parameters). We trained GigaAM on nearly 50 thousand hours of diversified speech audio in the Russian language.
17
+
18
+ Resources:
19
+ * [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ssl_model_weights.ckpt)
20
+ * [Encoder config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/encoder_config.yaml)
21
+ * [Colab example](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_Model_Usage_Example.ipynb)
22
+ * [Docker example](./examples/README.md)
23
+
24
+
25
+ ## GigaAM for Speech Recognition
26
+
27
+ We fine-tuned the GigaAM encoder for Speech Recognition with two different decoders:
28
+ * GigaAM-CTC was fine-tunined with [Connectionist Temporal Classification](https://www.cs.toronto.edu/~graves/icml_2006.pdf) and character-based tokenizer.
29
+ * GigaAM-RNNT was fine-tuned with [RNN Transducer loss](https://arxiv.org/abs/1211.3711) and subword tokenizer.
30
+
31
+ Both models were trained using [the NeMo toolkit](https://github.com/NVIDIA/NeMo) on publicly available Russian labeled data:
32
+
33
+ | dataset | size, hours | weight |
34
+ | --- | --- | --- |
35
+ | [Golos](https://arxiv.org/pdf/2106.10161.pdf) | 1227 | 0.6 |
36
+ | [SOVA](https://github.com/sovaai/sova-dataset) | 369 | 0.2 |
37
+ | [Russian Common Voice](https://arxiv.org/pdf/1912.06670.pdf) | 207 | 0.1 |
38
+ | [Russian LibriSpeech](https://arxiv.org/pdf/2012.03411.pdf) | 93 | 0.1 |
39
+
40
+
41
+ Resources:
42
+ * ### GigaAM-CTC:
43
+ * [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ctc_model_weights.ckpt)
44
+ * [Model config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ctc_model_config.yaml)
45
+ * [Colab example](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_CTC_Model_Usage_Example.ipynb)
46
+ * [Docker example](./examples/README.md)
47
+ * ### GigaAM-RNNT:
48
+ * [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/rnnt_model_weights.ckpt)
49
+ * [Model config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/rnnt_model_config.yaml)
50
+ * [Colab example](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_RNNT_Model_Usage_Example.ipynb)
51
+ * [Docker examples](./examples/README.md)
52
+
53
+ The following table summarizes the performance of different models in terms of Word Error Rate on open Russian datasets:
54
+
55
+ | model | parameters | [Golos Crowd](https://arxiv.org/abs/2106.10161) | [Golos Farfield](https://arxiv.org/abs/2106.10161) | [OpenSTT Youtube](https://github.com/snakers4/open_stt) | [OpenSTT Phone calls](https://github.com/snakers4/open_stt) | [OpenSTT Audiobooks](https://github.com/snakers4/open_stt) | [Mozilla Common Voice](https://arxiv.org/pdf/1912.06670.pdf) | [Russian LibriSpeech](https://arxiv.org/pdf/2012.03411.pdf) |
56
+ | --- | --- | --- | --- | --- | --- | --- | --- | --- |
57
+ | [Whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) | 1.5B | 17.4 | 14.5 | 21.1 | 31.2 | 17.0 | 5.3 | 9.0 |
58
+ | [NVIDIA Ru-FastConformer-RNNT](https://huggingface.co/nvidia/stt_ru_fastconformer_hybrid_large_pc) | 115M | 2.6 | 6.6 | 23.8 | 32.9 | 16.4 | 2.7 | 11.6 |
59
+ | GigaAM-CTC | 242M | 3.1 | 5.7 | 18.4 | 25.6 | 15.1| 1.7 | 8.1 |
60
+ | GigaAM-RNNT | 243M | <span style="color:green">2.3</span> | <span style="color:green">4.4</span> | <span style="color:green">16.7</span> | <span style="color:green">22.9</span> | <span style="color:green">13.9</span> | <span style="color:green">0.9</span> | <span style="color:green">7.4</span> |
61
+
62
+ ## GigaAM-Emo
63
+
64
+ GigaAM-Emo is an acoustic model for Emotion Recognition. We fine-tuned the GigaAM Encoder on the [Dusha](https://arxiv.org/pdf/2212.12266.pdf) dataset.
65
+
66
+ Resources:
67
+ * [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_weights.ckpt)
68
+ * [Model config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_config.yaml)
69
+ * [Colab example](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_Emo_Model_Usage_Example.ipynb)
70
+ * [Docker example](./examples/README.md)
71
+
72
+ The following table summarizes the performance of different models on the [Dusha](https://arxiv.org/pdf/2212.12266.pdf) dataset:
73
+
74
+ | | | Crowd | | | Podcast | |
75
+ | --- | --- | --- | --- | --- | --- | --- |
76
+ | | Unweighted Accuracy | Weighted Accuracy | Macro F1-score | Unweighted Accuracy | Weighted Accuracy | Macro F1-score |
77
+ | [DUSHA](https://arxiv.org/pdf/2212.12266.pdf) baseline <br/> ([MobileNetV2](https://arxiv.org/abs/1801.04381) + [Self-Attention](https://arxiv.org/pdf/1805.08318.pdf)) | 0.83 | 0.76 | 0.77 | 0.89 | 0.53 | 0.54 |
78
+ | [АБК](https://aij.ru/archive?albumId=2&videoId=337) ([TIM-Net](https://arxiv.org/pdf/2211.08233.pdf)) | 0.84 | 0.77 | 0.78 | <span style="color:green">0.90</span> | 0.50 | 0.55 |
79
+ | GigaAM-Emo | <span style="color:green">0.90</span> | <span style="color:green">0.87</span> | <span style="color:green">0.84</span> | <span style="color:green">0.90</span> | <span style="color:green">0.76</span> | <span style="color:green">0.67</span> |
80
+
81
+ ## Links
82
+ * [[habr] GigaAM: класс открытых моделей для обработки звучащей речи](https://habr.com/ru/companies/sberdevices/articles/805569)
83
+ * [[youtube] GigaAM: Семейство акустических моделей для русского языка](https://youtu.be/PvZuTUnZa2Q?t=26442)
84
+ * [[youtube] Speech-only Pre-training: обучение универсального аудиоэнкодера](https://www.youtube.com/watch?v=ktO4Mx6UMNk)
README_ru.md ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GigaAM: семейство акустических моделей для обработки звучащей речи
2
+
3
+ ![plot](./gigaam_scheme.svg)
4
+
5
+ ## Содержание
6
+
7
+ * [GigaAM](#gigaam)
8
+ * [GigaAM для распознавания речи](#gigaam-для-распознавания-речи)
9
+ * [GigaAM-CTC](#gigaam-ctc)
10
+ * [GigaAM-RNNT](#gigaam-rnnt)
11
+ * [GigaAM-Emo](#gigaam-emo)
12
+ * [Ссылки](#ссылки)
13
+
14
+ ## GigaAM
15
+
16
+ GigaAM (**Giga** **A**coustic **M**odel) — фундаментальная акустическая модель, основанная на [Conformer](https://arxiv.org/pdf/2005.08100.pdf) энкодере (около 240M параметров). Мы предобучали GigaAM в [wav2vec2](https://arxiv.org/pdf/2006.11477.pdf) режиме на 50 тысячах часов разнообразных русскоязычных данных.
17
+
18
+ Материалы
19
+ * [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ssl_model_weights.ckpt)
20
+ * [Encoder config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/encoder_config.yaml)
21
+ * [Пример использования в colab](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_Model_Usage_Example.ipynb)
22
+ * [Пример использования в docker](./examples/README.md)
23
+
24
+
25
+ ## GigaAM для распознавания речи
26
+ Мы дообучали GigaAM энкодер для задачи распознавания речи с двумя разными декодерами:
27
+ * GigaAM-CTC была дообучена с [CTC](https://www.cs.toronto.edu/~graves/icml_2006.pdf) функцией потерь и посимвольной токенизацией.
28
+ * GigaAM-RNNT была дообучена с [RNN-T](https://arxiv.org/abs/1211.3711) функцией потерь и subword-токенизацией.
29
+
30
+ Для обучения обеих моделей использовался [фреймворк NeMo](https://github.com/NVIDIA/NeMo) и следующие открытые данные:
31
+
32
+ | dataset | size, hours | weight |
33
+ | --- | --- | --- |
34
+ | [Golos](https://arxiv.org/pdf/2106.10161.pdf) | 1227 | 0.6 |
35
+ | [SOVA](https://github.com/sovaai/sova-dataset) | 369 | 0.2 |
36
+ | [Russian Common Voice](https://arxiv.org/pdf/1912.06670.pdf) | 207 | 0.1 |
37
+ | [Russian LibriSpeech](https://arxiv.org/pdf/2012.03411.pdf) | 93 | 0.1 |
38
+
39
+ Материалы:
40
+ * ### GigaAM-CTC:
41
+ * [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ctc_model_weights.ckpt)
42
+ * [Model config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ctc_model_config.yaml)
43
+ * [Пример использования в colab](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_CTC_Model_Usage_Example.ipynb)
44
+ * [Пример использования в docker](./examples/README.md)
45
+ * ### GigaAM-RNNT:
46
+ * [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/rnnt_model_weights.ckpt)
47
+ * [Model config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/rnnt_model_config.yaml)
48
+ * [Пример использования в colab](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_RNNT_Model_Usage_Example.ipynb)
49
+ * [Пример использования в docker](./examples/README.md)
50
+
51
+ В таблице ниже приведены оценки Word Error Rate различных моделей на открытых русскоязычных наборах данных:
52
+
53
+ | model | parameters | [Golos Crowd](https://arxiv.org/abs/2106.10161) | [Golos Farfield](https://arxiv.org/abs/2106.10161) | [OpenSTT Youtube](https://github.com/snakers4/open_stt) | [OpenSTT Phone calls](https://github.com/snakers4/open_stt) | [OpenSTT Audiobooks](https://github.com/snakers4/open_stt) | [Mozilla Common Voice](https://arxiv.org/pdf/1912.06670.pdf) | [Russian LibriSpeech](https://arxiv.org/pdf/2012.03411.pdf) |
54
+ | --- | --- | --- | --- | --- | --- | --- | --- | --- |
55
+ | [Whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) | 1.5B | 17.4 | 14.5 | 21.1 | 31.2 | 17.0 | 5.3 | 9.0 |
56
+ | [NVIDIA Ru-FastConformer-RNNT](https://huggingface.co/nvidia/stt_ru_fastconformer_hybrid_large_pc) | 115M | 2.6 | 6.6 | 23.8 | 32.9 | 16.4 | 2.7 | 11.6 |
57
+ | GigaAM-CTC | 242M | 3.1 | 5.7 | 18.4 | 25.6 | 15.1| 1.7 | 8.1 |
58
+ | GigaAM-RNNT | 243M | <span style="color:green">2.3</span> | <span style="color:green">4.4</span> | <span style="color:green">16.7</span> | <span style="color:green">22.9</span> | <span style="color:green">13.9</span> | <span style="color:green">0.9</span> | <span style="color:green">7.4</span> |
59
+
60
+ ## GigaAM-Emo
61
+
62
+ GigaAM-Emo — акустическая модель для определения эмоций. Мы доучивали GigaAM на датасете [Dusha](https://arxiv.org/pdf/2212.12266.pdf).
63
+
64
+ Материалы:
65
+ * [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_weights.ckpt)
66
+ * [Model config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_config.yaml)
67
+ * [Пример использования в colab](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_Emo_Model_Usage_Example.ipynb)
68
+ * [Пример использования в docker](./examples/README.md)
69
+
70
+
71
+ В таблице ниже приведены метрики качества открытых моделей на датасете [Dusha](https://arxiv.org/pdf/2212.12266.pdf):
72
+
73
+ | | | Crowd | | | Podcast | |
74
+ | --- | --- | --- | --- | --- | --- | --- |
75
+ | | Unweighted Accuracy | Weighted Accuracy | Macro F1-score | Unweighted Accuracy | Weighted Accuracy | Macro F1-score |
76
+ | [DUSHA](https://arxiv.org/pdf/2212.12266.pdf) baseline <br/> ([MobileNetV2](https://arxiv.org/abs/1801.04381) + [Self-Attention](https://arxiv.org/pdf/1805.08318.pdf)) | 0.83 | 0.76 | 0.77 | 0.89 | 0.53 | 0.54 |
77
+ | [АБК](https://aij.ru/archive?albumId=2&videoId=337) ([TIM-Net](https://arxiv.org/pdf/2211.08233.pdf)) | 0.84 | 0.77 | 0.78 | <span style="color:green">0.90</span> | 0.50 | 0.55 |
78
+ | GigaAM-Emo | <span style="color:green">0.90</span> | <span style="color:green">0.87</span> | <span style="color:green">0.84</span> | <span style="color:green">0.90</span> | <span style="color:green">0.76</span> | <span style="color:green">0.67</span> |
79
+
80
+ ## Ссылки
81
+ * [[habr] GigaAM: класс открытых моделей для обработки звучащей речи](https://habr.com/ru/companies/sberdevices/articles/805569)
82
+ * [[youtube] GigaAM: Семейство акустических моделей для русского языка](https://youtu.be/PvZuTUnZa2Q?t=26442)
83
+ * [[youtube] Speech-only Pre-training: обучение универсального аудиоэнкодера](https://www.youtube.com/watch?v=ktO4Mx6UMNk)
gigaam_scheme.svg ADDED