GigaAM
Browse files- .gitattributes +35 -35
- Examples/Dockerfile +13 -0
- Examples/README.md +126 -0
- Examples/ctc_inference.py +83 -0
- Examples/ctc_longform_inference.py +201 -0
- Examples/emo_inference.py +81 -0
- Examples/notebooks/GigaAM_CTC_Model_Usage_Example.ipynb +0 -0
- Examples/notebooks/GigaAM_Emo_Model_Usage_Example.ipynb +955 -0
- Examples/notebooks/GigaAM_Model_Usage_Example.ipynb +881 -0
- Examples/notebooks/GigaAM_RNNT_Model_Usage_Example.ipynb +0 -0
- Examples/rnnt_inference.py +98 -0
- Examples/rnnt_longform_inference.py +210 -0
- Examples/ssl_inference.py +55 -0
- GigaAM-CTC/ctc_model_config.yaml +271 -0
- GigaAM-CTC/ctc_model_weights.ckpt +3 -0
- GigaAM-Emo/emo_model_config.yaml +38 -0
- GigaAM-Emo/emo_model_weights.ckpt +3 -0
- GigaAM-RNNT/rnnt_model_config.yaml +109 -0
- GigaAM-RNNT/rnnt_model_weights.ckpt +3 -0
- GigaAM/encoder_config.yaml +27 -0
- GigaAM/ssl_model_weights.ckpt +3 -0
- README.md +84 -0
- README_ru.md +83 -0
- gigaam_scheme.svg +0 -0
.gitattributes
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
Examples/Dockerfile
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM nvcr.io/nvidia/nemo:23.10
|
| 2 |
+
|
| 3 |
+
RUN mkdir -p /workspace/data
|
| 4 |
+
WORKDIR /workspace/data
|
| 5 |
+
|
| 6 |
+
RUN wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/{ssl_model_weights.ckpt,emo_model_weights.ckpt,ctc_model_weights.ckpt,rnnt_model_weights.ckpt,ctc_model_config.yaml,emo_model_config.yaml,encoder_config.yaml,rnnt_model_config.yaml,tokenizer_all_sets.tar,example.wav,long_example.wav}
|
| 7 |
+
RUN tar -xf tokenizer_all_sets.tar && rm tokenizer_all_sets.tar
|
| 8 |
+
|
| 9 |
+
RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
| 10 |
+
RUN pip install Cython
|
| 11 |
+
RUN pip install git+https://github.com/NVIDIA/NeMo.git@1fa961ba03ab5f8c91b278640e29807079373372#egg=nemo_toolkit[all]
|
| 12 |
+
RUN pip install -U soundfile
|
| 13 |
+
RUN pip install pyannote.audio==3.2.0
|
Examples/README.md
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
* [Virtual environment](#virtual-environment)
|
| 2 |
+
* [Docker](#docker)
|
| 3 |
+
* For long-form inference:
|
| 4 |
+
* generate [Hugging Face API token](https://huggingface.co/docs/hub/security-tokens)
|
| 5 |
+
* accept the conditions to access [pyannote/voice-activity-detection](https://huggingface.co/pyannote/voice-activity-detection) files and content
|
| 6 |
+
* accept the conditions to access [pyannote/segmentation](https://huggingface.co/pyannote/segmentation) files and content
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
## Virtual environment
|
| 10 |
+
```bash
|
| 11 |
+
apt install python3-dev
|
| 12 |
+
apt install python3-venv
|
| 13 |
+
apt install ffmpeg libavcodec-extra
|
| 14 |
+
```
|
| 15 |
+
|
| 16 |
+
```bash
|
| 17 |
+
python3.10 -m venv venv && . venv/bin/activate
|
| 18 |
+
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
| 19 |
+
pip install Cython
|
| 20 |
+
pip install -U wheel
|
| 21 |
+
pip install git+https://github.com/NVIDIA/NeMo.git@1fa961ba03ab5f8c91b278640e29807079373372#egg=nemo_toolkit[all]
|
| 22 |
+
pip install pyannote.audio==3.2.0
|
| 23 |
+
mkdir ./data
|
| 24 |
+
wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/{ssl_model_weights.ckpt,emo_model_weights.ckpt,ctc_model_weights.ckpt,rnnt_model_weights.ckpt,ctc_model_config.yaml,emo_model_config.yaml,encoder_config.yaml,rnnt_model_config.yaml,tokenizer_all_sets.tar,example.wav,long_example.wav} -P ./data && tar -xf ./data/tokenizer_all_sets.tar --directory ./data/ && rm ./data/tokenizer_all_sets.tar
|
| 25 |
+
|
| 26 |
+
# GigaAM
|
| 27 |
+
python ssl_inference.py --encoder_config ./data/encoder_config.yaml \
|
| 28 |
+
--model_weights ./data/ssl_model_weights.ckpt --device cuda --audio_path ./data/example.wav
|
| 29 |
+
|
| 30 |
+
# encoded signal shape: torch.Size([1, 768, 283])
|
| 31 |
+
|
| 32 |
+
# GigaAM-CTC
|
| 33 |
+
python ctc_inference.py --model_config ./data/ctc_model_config.yaml \
|
| 34 |
+
--model_weights ./data/ctc_model_weights.ckpt --device cuda --audio_path ./data/example.wav
|
| 35 |
+
|
| 36 |
+
# transcription: ничьих не требуя похвал счастлив уж я надеждой сладкой что дева с трепетом любви посмотрит может быть украдкой на песни грешные мои у лукоморья дуб зеленый
|
| 37 |
+
|
| 38 |
+
# GigaAM-CTC long-form
|
| 39 |
+
python ctc_longform_inference.py --model_config ./data/ctc_model_config.yaml \
|
| 40 |
+
--model_weights ./data/ctc_model_weights.ckpt --device cuda \
|
| 41 |
+
--audio_path ./data/long_example.wav --hf_token <YOUR_HF_TOKEN>
|
| 42 |
+
|
| 43 |
+
# [00:00:00 - 00:16:83]: вечерня отошла давно но в кельях тихо и темно уже и сам эгумин строгий свои молитвы прекратил и кости ветхие склонил перекрестясь на одр убогий кругом и сон и тишина но церкви дверь отворена
|
| 44 |
+
# [00:17:10 - 00:32:61]: трепещет луч лампады и тускло озаряет он и темную живопись икон и возлощенные оклады и раздается в тишине то тяжкий вздох то шепот важный и мрачно дремлет в вашине старинный свод
|
| 45 |
+
# ...
|
| 46 |
+
|
| 47 |
+
# GigaAM-RNNT
|
| 48 |
+
python rnnt_inference.py --model_config ./data/rnnt_model_config.yaml \
|
| 49 |
+
--model_weights ./data/rnnt_model_weights.ckpt --tokenizer_path ./data/tokenizer_all_sets \
|
| 50 |
+
--device cuda --audio_path ./data/example.wav
|
| 51 |
+
|
| 52 |
+
# transcription: ничьих не требуя похвал счастлив уж я надеждой сладкой что дева с трепетом любви посмотрит может быть украдкой на песни грешные мои у лукоморья дуб зеленый
|
| 53 |
+
|
| 54 |
+
# GigaAM-RNNT long-form
|
| 55 |
+
python rnnt_longform_inference.py --model_config ./data/rnnt_model_config.yaml \
|
| 56 |
+
--model_weights ./data/rnnt_model_weights.ckpt --tokenizer_path ./data/tokenizer_all_sets \
|
| 57 |
+
--device cuda --audio_path ./data/long_example.wav --hf_token <YOUR_HF_TOKEN>
|
| 58 |
+
|
| 59 |
+
# [00:00:00 - 00:16:83]: вечерня отошла давно но в кельях тихо и темно уже и сам игумин строгий свои молитвы прекратил и кости ветхие склонил перекрестясь на одр убогий кругом и сон и тишина но церкви дверь отворена
|
| 60 |
+
# [00:17:10 - 00:32:61]: трепещет луч лампады и тускло озаряет он и темну живопись икон и возлащенные оклады и раздается в тишине то тяжкий вздох то шепот важный и мрачно дремлет в вышине старинный свод
|
| 61 |
+
# ...
|
| 62 |
+
|
| 63 |
+
# GigaAM-Emo
|
| 64 |
+
python emo_inference.py --model_config ./data/emo_model_config.yaml \
|
| 65 |
+
--model_weights ./data/emo_model_weights.ckpt --device cuda --audio_path ./data/example.wav
|
| 66 |
+
|
| 67 |
+
# angry: 0.000, sad: 0.002, neutral: 0.923, positive: 0.074
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
## Docker
|
| 71 |
+
|
| 72 |
+
```bash
|
| 73 |
+
docker build -t gigaam_image .
|
| 74 |
+
|
| 75 |
+
# GigaAM
|
| 76 |
+
docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
|
| 77 |
+
python /workspace/gigaam/ssl_inference.py --encoder_config /workspace/data/encoder_config.yaml \
|
| 78 |
+
--model_weights /workspace/data/ssl_model_weights.ckpt \
|
| 79 |
+
--device cuda --audio_path /workspace/data/example.wav
|
| 80 |
+
|
| 81 |
+
# encoded signal shape: torch.Size([1, 768, 283])
|
| 82 |
+
|
| 83 |
+
# GigaAM-CTC
|
| 84 |
+
docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
|
| 85 |
+
python /workspace/gigaam/ctc_inference.py --model_config /workspace/data/ctc_model_config.yaml \
|
| 86 |
+
--model_weights /workspace/data/ctc_model_weights.ckpt \
|
| 87 |
+
--device cuda --audio_path /workspace/data/example.wav
|
| 88 |
+
|
| 89 |
+
# transcription: ничьих не требуя похвал счастлив уж я надеждой сладкой что дева с трепетом любви посмотрит может быть украдкой на песни грешные мои у лукоморья дуб зеленый
|
| 90 |
+
|
| 91 |
+
# GigaAM-CTC longform
|
| 92 |
+
docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
|
| 93 |
+
python /workspace/gigaam/ctc_longform_inference.py --model_config /workspace/data/ctc_model_config.yaml \
|
| 94 |
+
--model_weights /workspace/data/ctc_model_weights.ckpt --device cuda \
|
| 95 |
+
--audio_path /workspace/data/long_example.wav --hf_token <YOUR_HF_TOKEN>
|
| 96 |
+
|
| 97 |
+
# [00:00:00 - 00:16:83]: вечерня отошла давно но в кельях тихо и темно уже и сам эгумин строгий свои молитвы прекратил и кости ветхие склонил перекрестясь на одр убогий кругом и сон и тишина но церкви дверь отворена
|
| 98 |
+
# [00:17:10 - 00:32:61]: трепещет луч лампады и тускло озаряет он и темную живопись икон и возлощенные оклады и раздается в тишине то тяжкий вздох то шепот важный и мрачно дремлет в вашине старинный свод
|
| 99 |
+
# ...
|
| 100 |
+
|
| 101 |
+
# GigaAM-RNNT
|
| 102 |
+
docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
|
| 103 |
+
python /workspace/gigaam/rnnt_inference.py --model_config /workspace/data/rnnt_model_config.yaml \
|
| 104 |
+
--model_weights /workspace/data/rnnt_model_weights.ckpt --tokenizer_path /workspace/data/tokenizer_all_sets \
|
| 105 |
+
--device cuda --audio_path /workspace/data/example.wav
|
| 106 |
+
|
| 107 |
+
# transcription: ничьих не требуя похвал счастлив уж я надеждой сладкой что дева с трепетом любви посмотрит может быть украдкой на песни грешные мои у лукоморья дуб зеленый
|
| 108 |
+
|
| 109 |
+
# GigaAM-RNNT longform
|
| 110 |
+
docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
|
| 111 |
+
python /workspace/gigaam/rnnt_longform_inference.py --model_config /workspace/data/rnnt_model_config.yaml \
|
| 112 |
+
--model_weights /workspace/data/rnnt_model_weights.ckpt --tokenizer_path /workspace/data/tokenizer_all_sets \
|
| 113 |
+
--device cuda --audio_path /workspace/data/long_example.wav --hf_token <YOUR_HF_TOKEN>
|
| 114 |
+
|
| 115 |
+
# [00:00:00 - 00:16:83]: вечерня отошла давно но в кельях тихо и темно уже и сам игумин строгий свои молитвы прекратил и кости ветхие склонил перекрестясь на одр убогий кругом и сон и тишина но церкви дверь отворена
|
| 116 |
+
# [00:17:10 - 00:32:61]: трепещет луч лампады и тускло озаряет он и темну живопись икон и возлащенные оклады и раздается в тишине то тяжкий вздох то шепот важный и мрачно дремлет в вышине старинный свод
|
| 117 |
+
# ...
|
| 118 |
+
|
| 119 |
+
# GigaAM-Emo
|
| 120 |
+
docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
|
| 121 |
+
python /workspace/gigaam/emo_inference.py --model_config /workspace/data/emo_model_config.yaml \
|
| 122 |
+
--model_weights /workspace/data/emo_model_weights.ckpt \
|
| 123 |
+
--device cuda --audio_path /workspace/data/example.wav
|
| 124 |
+
|
| 125 |
+
# angry: 0.000, sad: 0.002, neutral: 0.923, positive: 0.074
|
| 126 |
+
```
|
Examples/ctc_inference.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
import torchaudio
|
| 5 |
+
from nemo.collections.asr.models import EncDecCTCModel
|
| 6 |
+
from nemo.collections.asr.modules.audio_preprocessing import (
|
| 7 |
+
AudioToMelSpectrogramPreprocessor as NeMoAudioToMelSpectrogramPreprocessor,
|
| 8 |
+
)
|
| 9 |
+
from nemo.collections.asr.parts.preprocessing.features import (
|
| 10 |
+
FilterbankFeaturesTA as NeMoFilterbankFeaturesTA,
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class FilterbankFeaturesTA(NeMoFilterbankFeaturesTA):
|
| 15 |
+
def __init__(self, mel_scale: str = "htk", wkwargs=None, **kwargs):
|
| 16 |
+
if "window_size" in kwargs:
|
| 17 |
+
del kwargs["window_size"]
|
| 18 |
+
if "window_stride" in kwargs:
|
| 19 |
+
del kwargs["window_stride"]
|
| 20 |
+
|
| 21 |
+
super().__init__(**kwargs)
|
| 22 |
+
|
| 23 |
+
self._mel_spec_extractor = torchaudio.transforms.MelSpectrogram(
|
| 24 |
+
sample_rate=self._sample_rate,
|
| 25 |
+
win_length=self.win_length,
|
| 26 |
+
hop_length=self.hop_length,
|
| 27 |
+
n_mels=kwargs["nfilt"],
|
| 28 |
+
window_fn=self.torch_windows[kwargs["window"]],
|
| 29 |
+
mel_scale=mel_scale,
|
| 30 |
+
norm=kwargs["mel_norm"],
|
| 31 |
+
n_fft=kwargs["n_fft"],
|
| 32 |
+
f_max=kwargs.get("highfreq", None),
|
| 33 |
+
f_min=kwargs.get("lowfreq", 0),
|
| 34 |
+
wkwargs=wkwargs,
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class AudioToMelSpectrogramPreprocessor(NeMoAudioToMelSpectrogramPreprocessor):
|
| 39 |
+
def __init__(self, mel_scale: str = "htk", **kwargs):
|
| 40 |
+
super().__init__(**kwargs)
|
| 41 |
+
kwargs["nfilt"] = kwargs["features"]
|
| 42 |
+
del kwargs["features"]
|
| 43 |
+
self.featurizer = (
|
| 44 |
+
FilterbankFeaturesTA( # Deprecated arguments; kept for config compatibility
|
| 45 |
+
mel_scale=mel_scale,
|
| 46 |
+
**kwargs,
|
| 47 |
+
)
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def _parse_args():
|
| 52 |
+
parser = argparse.ArgumentParser(
|
| 53 |
+
description="Run inference using GigaAM-CTC checkpoint"
|
| 54 |
+
)
|
| 55 |
+
parser.add_argument("--model_config", help="Path to GigaAM-CTC config file (.yaml)")
|
| 56 |
+
parser.add_argument(
|
| 57 |
+
"--model_weights", help="Path to GigaAM-CTC checkpoint file (.ckpt)"
|
| 58 |
+
)
|
| 59 |
+
parser.add_argument("--audio_path", help="Path to audio signal")
|
| 60 |
+
parser.add_argument("--device", help="Device: cpu / cuda")
|
| 61 |
+
return parser.parse_args()
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def main(model_config: str, model_weights: str, device: str, audio_path: str):
|
| 65 |
+
model = EncDecCTCModel.from_config_file(model_config)
|
| 66 |
+
|
| 67 |
+
ckpt = torch.load(model_weights, map_location="cpu")
|
| 68 |
+
model.load_state_dict(ckpt, strict=False)
|
| 69 |
+
model = model.to(device)
|
| 70 |
+
model.eval()
|
| 71 |
+
|
| 72 |
+
transcription = model.transcribe([audio_path])[0]
|
| 73 |
+
print(f"transcription: {transcription}")
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
if __name__ == "__main__":
|
| 77 |
+
args = _parse_args()
|
| 78 |
+
main(
|
| 79 |
+
model_config=args.model_config,
|
| 80 |
+
model_weights=args.model_weights,
|
| 81 |
+
device=args.device,
|
| 82 |
+
audio_path=args.audio_path,
|
| 83 |
+
)
|
Examples/ctc_longform_inference.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
from io import BytesIO
|
| 3 |
+
from typing import List, Tuple
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
import torch
|
| 7 |
+
import torchaudio
|
| 8 |
+
from nemo.collections.asr.models import EncDecCTCModel
|
| 9 |
+
from nemo.collections.asr.modules.audio_preprocessing import (
|
| 10 |
+
AudioToMelSpectrogramPreprocessor as NeMoAudioToMelSpectrogramPreprocessor,
|
| 11 |
+
)
|
| 12 |
+
from nemo.collections.asr.parts.preprocessing.features import (
|
| 13 |
+
FilterbankFeaturesTA as NeMoFilterbankFeaturesTA,
|
| 14 |
+
)
|
| 15 |
+
from pyannote.audio import Pipeline
|
| 16 |
+
from pydub import AudioSegment
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class FilterbankFeaturesTA(NeMoFilterbankFeaturesTA):
|
| 20 |
+
def __init__(self, mel_scale: str = "htk", wkwargs=None, **kwargs):
|
| 21 |
+
if "window_size" in kwargs:
|
| 22 |
+
del kwargs["window_size"]
|
| 23 |
+
if "window_stride" in kwargs:
|
| 24 |
+
del kwargs["window_stride"]
|
| 25 |
+
|
| 26 |
+
super().__init__(**kwargs)
|
| 27 |
+
|
| 28 |
+
self._mel_spec_extractor = torchaudio.transforms.MelSpectrogram(
|
| 29 |
+
sample_rate=self._sample_rate,
|
| 30 |
+
win_length=self.win_length,
|
| 31 |
+
hop_length=self.hop_length,
|
| 32 |
+
n_mels=kwargs["nfilt"],
|
| 33 |
+
window_fn=self.torch_windows[kwargs["window"]],
|
| 34 |
+
mel_scale=mel_scale,
|
| 35 |
+
norm=kwargs["mel_norm"],
|
| 36 |
+
n_fft=kwargs["n_fft"],
|
| 37 |
+
f_max=kwargs.get("highfreq", None),
|
| 38 |
+
f_min=kwargs.get("lowfreq", 0),
|
| 39 |
+
wkwargs=wkwargs,
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class AudioToMelSpectrogramPreprocessor(NeMoAudioToMelSpectrogramPreprocessor):
|
| 44 |
+
def __init__(self, mel_scale: str = "htk", **kwargs):
|
| 45 |
+
super().__init__(**kwargs)
|
| 46 |
+
kwargs["nfilt"] = kwargs["features"]
|
| 47 |
+
del kwargs["features"]
|
| 48 |
+
self.featurizer = (
|
| 49 |
+
FilterbankFeaturesTA( # Deprecated arguments; kept for config compatibility
|
| 50 |
+
mel_scale=mel_scale,
|
| 51 |
+
**kwargs,
|
| 52 |
+
)
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def audiosegment_to_numpy(audiosegment: AudioSegment) -> np.ndarray:
|
| 57 |
+
"""Convert AudioSegment to numpy array."""
|
| 58 |
+
samples = np.array(audiosegment.get_array_of_samples())
|
| 59 |
+
if audiosegment.channels == 2:
|
| 60 |
+
samples = samples.reshape((-1, 2))
|
| 61 |
+
|
| 62 |
+
samples = samples.astype(np.float32, order="C") / 32768.0
|
| 63 |
+
return samples
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def format_time(seconds: float) -> str:
|
| 67 |
+
hours = int(seconds // 3600)
|
| 68 |
+
minutes = int((seconds % 3600) // 60)
|
| 69 |
+
seconds = seconds % 60
|
| 70 |
+
full_seconds = int(seconds)
|
| 71 |
+
milliseconds = int((seconds - full_seconds) * 100)
|
| 72 |
+
|
| 73 |
+
if hours > 0:
|
| 74 |
+
return f"{hours:02}:{minutes:02}:{full_seconds:02}:{milliseconds:02}"
|
| 75 |
+
else:
|
| 76 |
+
return f"{minutes:02}:{full_seconds:02}:{milliseconds:02}"
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def segment_audio(
|
| 80 |
+
audio_path: str,
|
| 81 |
+
pipeline: Pipeline,
|
| 82 |
+
max_duration: float = 22.0,
|
| 83 |
+
min_duration: float = 15.0,
|
| 84 |
+
new_chunk_threshold: float = 0.2,
|
| 85 |
+
) -> Tuple[List[np.ndarray], List[List[float]]]:
|
| 86 |
+
# Prepare audio for pyannote vad pipeline
|
| 87 |
+
audio = AudioSegment.from_wav(audio_path)
|
| 88 |
+
audio_bytes = BytesIO()
|
| 89 |
+
audio.export(audio_bytes, format="wav")
|
| 90 |
+
audio_bytes.seek(0)
|
| 91 |
+
|
| 92 |
+
# Process audio with pipeline to obtain segments with speech activity
|
| 93 |
+
sad_segments = pipeline({"uri": "filename", "audio": audio_bytes})
|
| 94 |
+
|
| 95 |
+
segments = []
|
| 96 |
+
curr_duration = 0
|
| 97 |
+
curr_start = 0
|
| 98 |
+
curr_end = 0
|
| 99 |
+
boundaries = []
|
| 100 |
+
|
| 101 |
+
# Concat segments from pipeline into chunks for asr according to max/min duration
|
| 102 |
+
for segment in sad_segments.get_timeline().support():
|
| 103 |
+
start = max(0, segment.start)
|
| 104 |
+
end = min(len(audio) / 1000, segment.end)
|
| 105 |
+
if (
|
| 106 |
+
curr_duration > min_duration and start - curr_end > new_chunk_threshold
|
| 107 |
+
) or (curr_duration + (end - curr_end) > max_duration):
|
| 108 |
+
audio_segment = audiosegment_to_numpy(
|
| 109 |
+
audio[curr_start * 1000 : curr_end * 1000]
|
| 110 |
+
)
|
| 111 |
+
segments.append(audio_segment)
|
| 112 |
+
boundaries.append([curr_start, curr_end])
|
| 113 |
+
curr_start = start
|
| 114 |
+
|
| 115 |
+
curr_end = end
|
| 116 |
+
curr_duration = curr_end - curr_start
|
| 117 |
+
|
| 118 |
+
if curr_duration != 0:
|
| 119 |
+
audio_segment = audiosegment_to_numpy(
|
| 120 |
+
audio[curr_start * 1000 : curr_end * 1000]
|
| 121 |
+
)
|
| 122 |
+
segments.append(audio_segment)
|
| 123 |
+
boundaries.append([curr_start, curr_end])
|
| 124 |
+
|
| 125 |
+
return segments, boundaries
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def _parse_args():
|
| 129 |
+
parser = argparse.ArgumentParser(
|
| 130 |
+
description="Run long-form inference using GigaAM-CTC checkpoint"
|
| 131 |
+
)
|
| 132 |
+
parser.add_argument("--model_config", help="Path to GigaAM-CTC config file (.yaml)")
|
| 133 |
+
parser.add_argument(
|
| 134 |
+
"--model_weights", help="Path to GigaAM-CTC checkpoint file (.ckpt)"
|
| 135 |
+
)
|
| 136 |
+
parser.add_argument("--audio_path", help="Path to audio signal")
|
| 137 |
+
parser.add_argument(
|
| 138 |
+
"--hf_token", help="HuggingFace token for using pyannote Pipeline"
|
| 139 |
+
)
|
| 140 |
+
parser.add_argument("--device", help="Device: cpu / cuda")
|
| 141 |
+
parser.add_argument("--fp16", help="Run in FP16 mode", default=True)
|
| 142 |
+
parser.add_argument(
|
| 143 |
+
"--batch_size", help="Batch size for acoustic model inference", default=10
|
| 144 |
+
)
|
| 145 |
+
return parser.parse_args()
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def main(
|
| 149 |
+
model_config: str,
|
| 150 |
+
model_weights: str,
|
| 151 |
+
device: str,
|
| 152 |
+
audio_path: str,
|
| 153 |
+
hf_token: str,
|
| 154 |
+
fp16: bool,
|
| 155 |
+
batch_size: int = 10,
|
| 156 |
+
):
|
| 157 |
+
# Initialize model
|
| 158 |
+
model = EncDecCTCModel.from_config_file(model_config)
|
| 159 |
+
|
| 160 |
+
ckpt = torch.load(model_weights, map_location="cpu")
|
| 161 |
+
model.load_state_dict(ckpt, strict=False)
|
| 162 |
+
model = model.to(device)
|
| 163 |
+
if device != "cpu" and fp16:
|
| 164 |
+
model = model.half()
|
| 165 |
+
model.preprocessor = model.preprocessor.float()
|
| 166 |
+
model.eval()
|
| 167 |
+
|
| 168 |
+
# Initialize pyannote pipeline
|
| 169 |
+
pipeline = Pipeline.from_pretrained(
|
| 170 |
+
"pyannote/voice-activity-detection", use_auth_token=hf_token
|
| 171 |
+
)
|
| 172 |
+
pipeline = pipeline.to(torch.device(device))
|
| 173 |
+
|
| 174 |
+
# Segment audio
|
| 175 |
+
segments, boundaries = segment_audio(audio_path, pipeline)
|
| 176 |
+
|
| 177 |
+
# Transcribe segments
|
| 178 |
+
transcriptions = []
|
| 179 |
+
if device != "cpu" and fp16:
|
| 180 |
+
with torch.autocast(device_type="cuda", dtype=torch.float16):
|
| 181 |
+
transcriptions = model.transcribe(segments, batch_size=batch_size)
|
| 182 |
+
else:
|
| 183 |
+
transcriptions = model.transcribe(segments, batch_size=batch_size)
|
| 184 |
+
|
| 185 |
+
for transcription, boundary in zip(transcriptions, boundaries):
|
| 186 |
+
print(
|
| 187 |
+
f"[{format_time(boundary[0])} - {format_time(boundary[1])}]: {transcription}\n"
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
if __name__ == "__main__":
|
| 192 |
+
args = _parse_args()
|
| 193 |
+
main(
|
| 194 |
+
model_config=args.model_config,
|
| 195 |
+
model_weights=args.model_weights,
|
| 196 |
+
device=args.device,
|
| 197 |
+
audio_path=args.audio_path,
|
| 198 |
+
hf_token=args.hf_token,
|
| 199 |
+
fp16=args.fp16,
|
| 200 |
+
batch_size=args.batch_size,
|
| 201 |
+
)
|
Examples/emo_inference.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
from typing import List, Union
|
| 3 |
+
|
| 4 |
+
import hydra
|
| 5 |
+
import soundfile
|
| 6 |
+
import torch
|
| 7 |
+
from omegaconf import DictConfig, ListConfig, OmegaConf
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class SpecScaler(torch.nn.Module):
|
| 11 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 12 |
+
return torch.log(x.clamp_(1e-9, 1e9))
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class GigaAMEmo(torch.nn.Module):
|
| 16 |
+
def __init__(self, conf: Union[DictConfig, ListConfig]):
|
| 17 |
+
super().__init__()
|
| 18 |
+
self.id2name = conf.id2name
|
| 19 |
+
self.feature_extractor = hydra.utils.instantiate(conf.feature_extractor)
|
| 20 |
+
self.conformer = hydra.utils.instantiate(conf.encoder)
|
| 21 |
+
self.linear_head = hydra.utils.instantiate(conf.classification_head)
|
| 22 |
+
|
| 23 |
+
@property
|
| 24 |
+
def device(self):
|
| 25 |
+
return next(self.parameters()).device
|
| 26 |
+
|
| 27 |
+
def forward(self, features, features_length=None):
|
| 28 |
+
if features.dim() == 2:
|
| 29 |
+
features = features.unsqueeze(0)
|
| 30 |
+
if not features_length:
|
| 31 |
+
features_length = torch.ones(features.shape[0], device=self.device) * features.shape[-1]
|
| 32 |
+
encoded, _ = self.conformer(audio_signal=features, length=features_length)
|
| 33 |
+
encoded_pooled = torch.nn.functional.avg_pool1d(
|
| 34 |
+
encoded, kernel_size=encoded.shape[-1]
|
| 35 |
+
).squeeze(-1)
|
| 36 |
+
|
| 37 |
+
logits = self.linear_head(encoded_pooled)
|
| 38 |
+
return logits
|
| 39 |
+
|
| 40 |
+
def get_probs(self, audio_path: str) -> List[List[float]]:
|
| 41 |
+
audio_signal, _ = soundfile.read(audio_path, dtype="float32")
|
| 42 |
+
audio_tensor = torch.tensor(audio_signal).float().to(self.device)
|
| 43 |
+
features = self.feature_extractor(audio_tensor)
|
| 44 |
+
logits = self.forward(features)
|
| 45 |
+
probs = torch.nn.functional.softmax(logits, dim=1).detach().tolist()
|
| 46 |
+
return probs
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _parse_args():
|
| 50 |
+
parser = argparse.ArgumentParser(
|
| 51 |
+
description="Run inference using GigaAM-Emo checkpoint"
|
| 52 |
+
)
|
| 53 |
+
parser.add_argument("--model_config", help="Path to GigaAM-Emo config file (.yaml)")
|
| 54 |
+
parser.add_argument(
|
| 55 |
+
"--model_weights", help="Path to GigaAM-Emo checkpoint file (.ckpt)"
|
| 56 |
+
)
|
| 57 |
+
parser.add_argument("--audio_path", help="Path to audio signal")
|
| 58 |
+
parser.add_argument("--device", help="Device: cpu / cuda")
|
| 59 |
+
return parser.parse_args()
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def main(model_config: str, model_weights: str, device: str, audio_path: str):
|
| 63 |
+
conf = OmegaConf.load(model_config)
|
| 64 |
+
model = GigaAMEmo(conf)
|
| 65 |
+
ckpt = torch.load(model_weights, map_location="cpu")
|
| 66 |
+
model.load_state_dict(ckpt, strict=False)
|
| 67 |
+
model = model.to(device)
|
| 68 |
+
model.eval()
|
| 69 |
+
with torch.no_grad():
|
| 70 |
+
probs = model.get_probs(audio_path)[0]
|
| 71 |
+
print(", ".join([f"{model.id2name[i]}: {p:.3f}" for i, p in enumerate(probs)]))
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
if __name__ == "__main__":
|
| 75 |
+
args = _parse_args()
|
| 76 |
+
main(
|
| 77 |
+
model_config=args.model_config,
|
| 78 |
+
model_weights=args.model_weights,
|
| 79 |
+
device=args.device,
|
| 80 |
+
audio_path=args.audio_path,
|
| 81 |
+
)
|
Examples/notebooks/GigaAM_CTC_Model_Usage_Example.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Examples/notebooks/GigaAM_Emo_Model_Usage_Example.ipynb
ADDED
|
@@ -0,0 +1,955 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {
|
| 6 |
+
"id": "oREzT-effoFr"
|
| 7 |
+
},
|
| 8 |
+
"source": [
|
| 9 |
+
"### Installing and importing dependencies"
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "code",
|
| 14 |
+
"execution_count": null,
|
| 15 |
+
"metadata": {
|
| 16 |
+
"colab": {
|
| 17 |
+
"base_uri": "https://localhost:8080/"
|
| 18 |
+
},
|
| 19 |
+
"id": "yxU1SOPfWxab",
|
| 20 |
+
"outputId": "e9b2c73a-d3d4-4ba9-8ce1-3527c95c4d3f"
|
| 21 |
+
},
|
| 22 |
+
"outputs": [
|
| 23 |
+
{
|
| 24 |
+
"name": "stdout",
|
| 25 |
+
"output_type": "stream",
|
| 26 |
+
"text": [
|
| 27 |
+
"Collecting wget\n",
|
| 28 |
+
" Downloading wget-3.2.zip (10 kB)\n",
|
| 29 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 30 |
+
"Building wheels for collected packages: wget\n",
|
| 31 |
+
" Building wheel for wget (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 32 |
+
" Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9656 sha256=2e82f0e3a185ee764cf0a1eef86b3f525139a342d3630e878d05860de80d6dee\n",
|
| 33 |
+
" Stored in directory: /root/.cache/pip/wheels/8b/f1/7f/5c94f0a7a505ca1c81cd1d9208ae2064675d97582078e6c769\n",
|
| 34 |
+
"Successfully built wget\n",
|
| 35 |
+
"Installing collected packages: wget\n",
|
| 36 |
+
"Successfully installed wget-3.2\n",
|
| 37 |
+
"Reading package lists... Done\n",
|
| 38 |
+
"Building dependency tree... Done\n",
|
| 39 |
+
"Reading state information... Done\n",
|
| 40 |
+
"libsndfile1 is already the newest version (1.0.31-2ubuntu0.1).\n",
|
| 41 |
+
"ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).\n",
|
| 42 |
+
"The following additional packages will be installed:\n",
|
| 43 |
+
" libopencore-amrnb0 libopencore-amrwb0 libsox-fmt-alsa libsox-fmt-base libsox3 libwavpack1\n",
|
| 44 |
+
"Suggested packages:\n",
|
| 45 |
+
" libsox-fmt-all\n",
|
| 46 |
+
"The following NEW packages will be installed:\n",
|
| 47 |
+
" libopencore-amrnb0 libopencore-amrwb0 libsox-fmt-alsa libsox-fmt-base libsox3 libwavpack1 sox\n",
|
| 48 |
+
"0 upgraded, 7 newly installed, 0 to remove and 45 not upgraded.\n",
|
| 49 |
+
"Need to get 617 kB of archives.\n",
|
| 50 |
+
"After this operation, 1,764 kB of additional disk space will be used.\n",
|
| 51 |
+
"Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libopencore-amrnb0 amd64 0.1.5-1 [94.8 kB]\n",
|
| 52 |
+
"Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libopencore-amrwb0 amd64 0.1.5-1 [49.1 kB]\n",
|
| 53 |
+
"Get:3 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox3 amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [240 kB]\n",
|
| 54 |
+
"Get:4 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox-fmt-alsa amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [11.2 kB]\n",
|
| 55 |
+
"Get:5 http://archive.ubuntu.com/ubuntu jammy/main amd64 libwavpack1 amd64 5.4.0-1build2 [83.7 kB]\n",
|
| 56 |
+
"Get:6 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox-fmt-base amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [33.7 kB]\n",
|
| 57 |
+
"Get:7 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 sox amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [104 kB]\n",
|
| 58 |
+
"Fetched 617 kB in 0s (2,444 kB/s)\n",
|
| 59 |
+
"Selecting previously unselected package libopencore-amrnb0:amd64.\n",
|
| 60 |
+
"(Reading database ... 121918 files and directories currently installed.)\n",
|
| 61 |
+
"Preparing to unpack .../0-libopencore-amrnb0_0.1.5-1_amd64.deb ...\n",
|
| 62 |
+
"Unpacking libopencore-amrnb0:amd64 (0.1.5-1) ...\n",
|
| 63 |
+
"Selecting previously unselected package libopencore-amrwb0:amd64.\n",
|
| 64 |
+
"Preparing to unpack .../1-libopencore-amrwb0_0.1.5-1_amd64.deb ...\n",
|
| 65 |
+
"Unpacking libopencore-amrwb0:amd64 (0.1.5-1) ...\n",
|
| 66 |
+
"Selecting previously unselected package libsox3:amd64.\n",
|
| 67 |
+
"Preparing to unpack .../2-libsox3_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
|
| 68 |
+
"Unpacking libsox3:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
|
| 69 |
+
"Selecting previously unselected package libsox-fmt-alsa:amd64.\n",
|
| 70 |
+
"Preparing to unpack .../3-libsox-fmt-alsa_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
|
| 71 |
+
"Unpacking libsox-fmt-alsa:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
|
| 72 |
+
"Selecting previously unselected package libwavpack1:amd64.\n",
|
| 73 |
+
"Preparing to unpack .../4-libwavpack1_5.4.0-1build2_amd64.deb ...\n",
|
| 74 |
+
"Unpacking libwavpack1:amd64 (5.4.0-1build2) ...\n",
|
| 75 |
+
"Selecting previously unselected package libsox-fmt-base:amd64.\n",
|
| 76 |
+
"Preparing to unpack .../5-libsox-fmt-base_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
|
| 77 |
+
"Unpacking libsox-fmt-base:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
|
| 78 |
+
"Selecting previously unselected package sox.\n",
|
| 79 |
+
"Preparing to unpack .../6-sox_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
|
| 80 |
+
"Unpacking sox (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
|
| 81 |
+
"Setting up libsox3:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
|
| 82 |
+
"Setting up libopencore-amrwb0:amd64 (0.1.5-1) ...\n",
|
| 83 |
+
"Setting up libsox-fmt-alsa:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
|
| 84 |
+
"Setting up libwavpack1:amd64 (5.4.0-1build2) ...\n",
|
| 85 |
+
"Setting up libopencore-amrnb0:amd64 (0.1.5-1) ...\n",
|
| 86 |
+
"Setting up libsox-fmt-base:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
|
| 87 |
+
"Setting up sox (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
|
| 88 |
+
"Processing triggers for man-db (2.10.2-1) ...\n",
|
| 89 |
+
"Processing triggers for libc-bin (2.35-0ubuntu3.4) ...\n",
|
| 90 |
+
"/sbin/ldconfig.real: /usr/local/lib/libtbb.so.12 is not a symbolic link\n",
|
| 91 |
+
"\n",
|
| 92 |
+
"/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc_proxy.so.2 is not a symbolic link\n",
|
| 93 |
+
"\n",
|
| 94 |
+
"/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_5.so.3 is not a symbolic link\n",
|
| 95 |
+
"\n",
|
| 96 |
+
"/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_0.so.3 is not a symbolic link\n",
|
| 97 |
+
"\n",
|
| 98 |
+
"/sbin/ldconfig.real: /usr/local/lib/libtbbbind.so.3 is not a symbolic link\n",
|
| 99 |
+
"\n",
|
| 100 |
+
"/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc.so.2 is not a symbolic link\n",
|
| 101 |
+
"\n",
|
| 102 |
+
"\u001b[33mDEPRECATION: git+https://github.com/NVIDIA/NeMo.git#egg=nemo_toolkit[all] contains an egg fragment with a non-PEP 508 name pip 25.0 will enforce this behaviour change. A possible replacement is to use the req @ url syntax, and remove the egg fragment. Discussion can be found at https://github.com/pypa/pip/issues/11617\u001b[0m\u001b[33m\n",
|
| 103 |
+
"\u001b[0mCollecting nemo_toolkit[all]\n",
|
| 104 |
+
" Cloning https://github.com/NVIDIA/NeMo.git to /tmp/pip-install-unbwo6dj/nemo-toolkit_de6e0e6e28ce411cafb3187496bb4905\n",
|
| 105 |
+
" Running command git clone --filter=blob:none --quiet https://github.com/NVIDIA/NeMo.git /tmp/pip-install-unbwo6dj/nemo-toolkit_de6e0e6e28ce411cafb3187496bb4905\n",
|
| 106 |
+
" Resolved https://github.com/NVIDIA/NeMo.git to commit 1fa961ba03ab5f8c91b278640e29807079373372\n",
|
| 107 |
+
" Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
|
| 108 |
+
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
|
| 109 |
+
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
|
| 110 |
+
"Collecting fiddle (from nemo_toolkit[all])\n",
|
| 111 |
+
" Downloading fiddle-0.3.0-py3-none-any.whl (419 kB)\n",
|
| 112 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m419.8/419.8 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 113 |
+
"\u001b[?25hRequirement already satisfied: huggingface-hub>=0.20.3 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.23.1)\n",
|
| 114 |
+
"Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.58.1)\n",
|
| 115 |
+
"Requirement already satisfied: numpy>=1.22 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.25.2)\n",
|
| 116 |
+
"Collecting onnx>=1.7.0 (from nemo_toolkit[all])\n",
|
| 117 |
+
" Downloading onnx-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)\n",
|
| 118 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m34.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 119 |
+
"\u001b[?25hRequirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.8.2)\n",
|
| 120 |
+
"Collecting ruamel.yaml (from nemo_toolkit[all])\n",
|
| 121 |
+
" Downloading ruamel.yaml-0.18.6-py3-none-any.whl (117 kB)\n",
|
| 122 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.8/117.8 kB\u001b[0m \u001b[31m14.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 123 |
+
"\u001b[?25hRequirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.2.2)\n",
|
| 124 |
+
"Requirement already satisfied: setuptools>=65.5.1 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (67.7.2)\n",
|
| 125 |
+
"Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.15.2)\n",
|
| 126 |
+
"Requirement already satisfied: text-unidecode in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.3)\n",
|
| 127 |
+
"Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.3.0+cu121)\n",
|
| 128 |
+
"Requirement already satisfied: tqdm>=4.41.0 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (4.66.4)\n",
|
| 129 |
+
"Requirement already satisfied: wget in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.2)\n",
|
| 130 |
+
"Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.14.1)\n",
|
| 131 |
+
"Collecting black~=24.3 (from nemo_toolkit[all])\n",
|
| 132 |
+
" Downloading black-24.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.8 MB)\n",
|
| 133 |
+
"\u001b[2K \u001b[90m━━━━━━���━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m53.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 134 |
+
"\u001b[?25hCollecting click==8.0.2 (from nemo_toolkit[all])\n",
|
| 135 |
+
" Downloading click-8.0.2-py3-none-any.whl (97 kB)\n",
|
| 136 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m97.6/97.6 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 137 |
+
"\u001b[?25hCollecting isort<6.0.0,>5.1.0 (from nemo_toolkit[all])\n",
|
| 138 |
+
" Downloading isort-5.13.2-py3-none-any.whl (92 kB)\n",
|
| 139 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.3/92.3 kB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 140 |
+
"\u001b[?25hCollecting parameterized (from nemo_toolkit[all])\n",
|
| 141 |
+
" Downloading parameterized-0.9.0-py2.py3-none-any.whl (20 kB)\n",
|
| 142 |
+
"Requirement already satisfied: pytest in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (7.4.4)\n",
|
| 143 |
+
"Collecting pytest-mock (from nemo_toolkit[all])\n",
|
| 144 |
+
" Downloading pytest_mock-3.14.0-py3-none-any.whl (9.9 kB)\n",
|
| 145 |
+
"Collecting pytest-runner (from nemo_toolkit[all])\n",
|
| 146 |
+
" Downloading pytest_runner-6.0.1-py3-none-any.whl (7.2 kB)\n",
|
| 147 |
+
"Requirement already satisfied: sphinx in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (5.0.2)\n",
|
| 148 |
+
"Collecting sphinxcontrib-bibtex (from nemo_toolkit[all])\n",
|
| 149 |
+
" Downloading sphinxcontrib_bibtex-2.6.2-py3-none-any.whl (40 kB)\n",
|
| 150 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.0/41.0 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 151 |
+
"\u001b[?25hCollecting wandb (from nemo_toolkit[all])\n",
|
| 152 |
+
" Downloading wandb-0.17.0-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.7 MB)\n",
|
| 153 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.7/6.7 MB\u001b[0m \u001b[31m59.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 154 |
+
"\u001b[?25hRequirement already satisfied: cloudpickle in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.2.1)\n",
|
| 155 |
+
"Collecting hydra-core<=1.3.2,>1.3 (from nemo_toolkit[all])\n",
|
| 156 |
+
" Downloading hydra_core-1.3.2-py3-none-any.whl (154 kB)\n",
|
| 157 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m154.5/154.5 kB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 158 |
+
"\u001b[?25hCollecting omegaconf<=2.3 (from nemo_toolkit[all])\n",
|
| 159 |
+
" Downloading omegaconf-2.3.0-py3-none-any.whl (79 kB)\n",
|
| 160 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 kB\u001b[0m \u001b[31m10.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 161 |
+
"\u001b[?25hCollecting pytorch-lightning>=2.2.1 (from nemo_toolkit[all])\n",
|
| 162 |
+
" Downloading pytorch_lightning-2.2.5-py3-none-any.whl (802 kB)\n",
|
| 163 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m802.3/802.3 kB\u001b[0m \u001b[31m45.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 164 |
+
"\u001b[?25hCollecting torchmetrics>=0.11.0 (from nemo_toolkit[all])\n",
|
| 165 |
+
" Downloading torchmetrics-1.4.0.post0-py3-none-any.whl (868 kB)\n",
|
| 166 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m868.8/868.8 kB\u001b[0m \u001b[31m58.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 167 |
+
"\u001b[?25hCollecting transformers<=4.40.2,>=4.36.0 (from nemo_toolkit[all])\n",
|
| 168 |
+
" Downloading transformers-4.40.2-py3-none-any.whl (9.0 MB)\n",
|
| 169 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.0/9.0 MB\u001b[0m \u001b[31m76.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 170 |
+
"\u001b[?25hCollecting webdataset>=0.2.86 (from nemo_toolkit[all])\n",
|
| 171 |
+
" Downloading webdataset-0.2.86-py3-none-any.whl (70 kB)\n",
|
| 172 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m70.4/70.4 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 173 |
+
"\u001b[?25hCollecting datasets (from nemo_toolkit[all])\n",
|
| 174 |
+
" Downloading datasets-2.19.1-py3-none-any.whl (542 kB)\n",
|
| 175 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m542.0/542.0 kB\u001b[0m \u001b[31m46.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 176 |
+
"\u001b[?25hRequirement already satisfied: inflect in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (7.0.0)\n",
|
| 177 |
+
"Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.0.3)\n",
|
| 178 |
+
"Collecting sacremoses>=0.0.43 (from nemo_toolkit[all])\n",
|
| 179 |
+
" Downloading sacremoses-0.1.1-py3-none-any.whl (897 kB)\n",
|
| 180 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m897.5/897.5 kB\u001b[0m \u001b[31m58.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 181 |
+
"\u001b[?25hRequirement already satisfied: sentencepiece<1.0.0 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.1.99)\n",
|
| 182 |
+
"Collecting braceexpand (from nemo_toolkit[all])\n",
|
| 183 |
+
" Downloading braceexpand-0.1.7-py2.py3-none-any.whl (5.9 kB)\n",
|
| 184 |
+
"Requirement already satisfied: editdistance in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.6.2)\n",
|
| 185 |
+
"Collecting einops (from nemo_toolkit[all])\n",
|
| 186 |
+
" Downloading einops-0.8.0-py3-none-any.whl (43 kB)\n",
|
| 187 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.2/43.2 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 188 |
+
"\u001b[?25hCollecting g2p-en (from nemo_toolkit[all])\n",
|
| 189 |
+
" Downloading g2p_en-2.1.0-py3-none-any.whl (3.1 MB)\n",
|
| 190 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m78.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 191 |
+
"\u001b[?25hRequirement already satisfied: ipywidgets in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (7.7.1)\n",
|
| 192 |
+
"Collecting jiwer (from nemo_toolkit[all])\n",
|
| 193 |
+
" Downloading jiwer-3.0.4-py3-none-any.whl (21 kB)\n",
|
| 194 |
+
"Collecting kaldi-python-io (from nemo_toolkit[all])\n",
|
| 195 |
+
" Downloading kaldi-python-io-1.2.2.tar.gz (8.8 kB)\n",
|
| 196 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 197 |
+
"Collecting kaldiio (from nemo_toolkit[all])\n",
|
| 198 |
+
" Downloading kaldiio-2.18.0-py3-none-any.whl (28 kB)\n",
|
| 199 |
+
"Collecting lhotse>=1.22.0 (from nemo_toolkit[all])\n",
|
| 200 |
+
" Downloading lhotse-1.23.0-py3-none-any.whl (772 kB)\n",
|
| 201 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m772.4/772.4 kB\u001b[0m \u001b[31m46.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 202 |
+
"\u001b[?25hRequirement already satisfied: librosa>=0.10.0 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.10.2.post1)\n",
|
| 203 |
+
"Collecting marshmallow (from nemo_toolkit[all])\n",
|
| 204 |
+
" Downloading marshmallow-3.21.2-py3-none-any.whl (49 kB)\n",
|
| 205 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 206 |
+
"\u001b[?25hRequirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.7.1)\n",
|
| 207 |
+
"Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (24.0)\n",
|
| 208 |
+
"Collecting pyannote.core (from nemo_toolkit[all])\n",
|
| 209 |
+
" Downloading pyannote.core-5.0.0-py3-none-any.whl (58 kB)\n",
|
| 210 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.5/58.5 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 211 |
+
"\u001b[?25hCollecting pyannote.metrics (from nemo_toolkit[all])\n",
|
| 212 |
+
" Downloading pyannote.metrics-3.2.1-py3-none-any.whl (51 kB)\n",
|
| 213 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.4/51.4 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 214 |
+
"\u001b[?25hCollecting pydub (from nemo_toolkit[all])\n",
|
| 215 |
+
" Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
|
| 216 |
+
"Collecting pyloudnorm (from nemo_toolkit[all])\n",
|
| 217 |
+
" Downloading pyloudnorm-0.1.1-py3-none-any.whl (9.6 kB)\n",
|
| 218 |
+
"Collecting resampy (from nemo_toolkit[all])\n",
|
| 219 |
+
" Downloading resampy-0.4.3-py3-none-any.whl (3.1 MB)\n",
|
| 220 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m82.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 221 |
+
"\u001b[?25hRequirement already satisfied: scipy>=0.14 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.11.4)\n",
|
| 222 |
+
"Requirement already satisfied: soundfile in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.12.1)\n",
|
| 223 |
+
"Collecting sox (from nemo_toolkit[all])\n",
|
| 224 |
+
" Downloading sox-1.5.0.tar.gz (63 kB)\n",
|
| 225 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.9/63.9 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 226 |
+
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 227 |
+
"Collecting texterrors (from nemo_toolkit[all])\n",
|
| 228 |
+
" Downloading texterrors-0.4.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n",
|
| 229 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m64.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 230 |
+
"\u001b[?25hCollecting accelerated-scan (from nemo_toolkit[all])\n",
|
| 231 |
+
" Downloading accelerated_scan-0.2.0-py3-none-any.whl (11 kB)\n",
|
| 232 |
+
"Collecting boto3 (from nemo_toolkit[all])\n",
|
| 233 |
+
" Downloading boto3-1.34.113-py3-none-any.whl (139 kB)\n",
|
| 234 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.3/139.3 kB\u001b[0m \u001b[31m16.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 235 |
+
"\u001b[?25hCollecting causal-conv1d>=1.2.0 (from nemo_toolkit[all])\n",
|
| 236 |
+
" Downloading causal_conv1d-1.2.2.post1.tar.gz (7.2 kB)\n",
|
| 237 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 238 |
+
"Collecting faiss-cpu (from nemo_toolkit[all])\n",
|
| 239 |
+
" Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)\n",
|
| 240 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.0/27.0 MB\u001b[0m \u001b[31m45.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 241 |
+
"\u001b[?25hCollecting fasttext (from nemo_toolkit[all])\n",
|
| 242 |
+
" Downloading fasttext-0.9.2.tar.gz (68 kB)\n",
|
| 243 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m68.8/68.8 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 244 |
+
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 245 |
+
"Collecting flask-restful (from nemo_toolkit[all])\n",
|
| 246 |
+
" Downloading Flask_RESTful-0.3.10-py2.py3-none-any.whl (26 kB)\n",
|
| 247 |
+
"Collecting ftfy (from nemo_toolkit[all])\n",
|
| 248 |
+
" Downloading ftfy-6.2.0-py3-none-any.whl (54 kB)\n",
|
| 249 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.4/54.4 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 250 |
+
"\u001b[?25hRequirement already satisfied: gdown in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (5.1.0)\n",
|
| 251 |
+
"Requirement already satisfied: h5py in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.9.0)\n",
|
| 252 |
+
"Collecting ijson (from nemo_toolkit[all])\n",
|
| 253 |
+
" Downloading ijson-3.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (111 kB)\n",
|
| 254 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.8/111.8 kB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 255 |
+
"\u001b[?25hRequirement already satisfied: jieba in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.42.1)\n",
|
| 256 |
+
"Collecting markdown2 (from nemo_toolkit[all])\n",
|
| 257 |
+
" Downloading markdown2-2.4.13-py2.py3-none-any.whl (41 kB)\n",
|
| 258 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.3/41.3 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 259 |
+
"\u001b[?25hRequirement already satisfied: nltk>=3.6.5 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.8.1)\n",
|
| 260 |
+
"Collecting opencc<1.1.7 (from nemo_toolkit[all])\n",
|
| 261 |
+
" Downloading OpenCC-1.1.6-cp310-cp310-manylinux1_x86_64.whl (778 kB)\n",
|
| 262 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m778.3/778.3 kB\u001b[0m \u001b[31m54.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 263 |
+
"\u001b[?25hCollecting pangu (from nemo_toolkit[all])\n",
|
| 264 |
+
" Downloading pangu-4.0.6.1-py3-none-any.whl (6.4 kB)\n",
|
| 265 |
+
"Collecting rapidfuzz (from nemo_toolkit[all])\n",
|
| 266 |
+
" Downloading rapidfuzz-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n",
|
| 267 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m83.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 268 |
+
"\u001b[?25hCollecting rouge-score (from nemo_toolkit[all])\n",
|
| 269 |
+
" Downloading rouge_score-0.1.2.tar.gz (17 kB)\n",
|
| 270 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 271 |
+
"Collecting sacrebleu (from nemo_toolkit[all])\n",
|
| 272 |
+
" Downloading sacrebleu-2.4.2-py3-none-any.whl (106 kB)\n",
|
| 273 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.7/106.7 kB\u001b[0m \u001b[31m11.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 274 |
+
"\u001b[?25hCollecting sentence-transformers (from nemo_toolkit[all])\n",
|
| 275 |
+
" Downloading sentence_transformers-2.7.0-py3-none-any.whl (171 kB)\n",
|
| 276 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m171.5/171.5 kB\u001b[0m \u001b[31m16.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 277 |
+
"\u001b[?25hRequirement already satisfied: tensorstore<0.1.46 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.1.45)\n",
|
| 278 |
+
"Collecting zarr (from nemo_toolkit[all])\n",
|
| 279 |
+
" Downloading zarr-2.18.2-py3-none-any.whl (210 kB)\n",
|
| 280 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m210.2/210.2 kB\u001b[0m \u001b[31m22.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 281 |
+
"\u001b[?25hCollecting attrdict (from nemo_toolkit[all])\n",
|
| 282 |
+
" Downloading attrdict-2.0.1-py2.py3-none-any.whl (9.9 kB)\n",
|
| 283 |
+
"Collecting kornia (from nemo_toolkit[all])\n",
|
| 284 |
+
" Downloading kornia-0.7.2-py2.py3-none-any.whl (825 kB)\n",
|
| 285 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m825.4/825.4 kB\u001b[0m \u001b[31m57.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 286 |
+
"\u001b[?25hCollecting pypinyin (from nemo_toolkit[all])\n",
|
| 287 |
+
" Downloading pypinyin-0.51.0-py2.py3-none-any.whl (1.4 MB)\n",
|
| 288 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m61.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 289 |
+
"\u001b[?25hCollecting pypinyin-dict (from nemo_toolkit[all])\n",
|
| 290 |
+
" Downloading pypinyin_dict-0.8.0-py2.py3-none-any.whl (9.5 MB)\n",
|
| 291 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.5/9.5 MB\u001b[0m \u001b[31m88.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 292 |
+
"\u001b[?25hCollecting progress>=1.5 (from nemo_toolkit[all])\n",
|
| 293 |
+
" Downloading progress-1.6.tar.gz (7.8 kB)\n",
|
| 294 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 295 |
+
"Requirement already satisfied: tabulate>=0.8.7 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.9.0)\n",
|
| 296 |
+
"Collecting textdistance>=4.1.5 (from nemo_toolkit[all])\n",
|
| 297 |
+
" Downloading textdistance-4.6.2-py3-none-any.whl (31 kB)\n",
|
| 298 |
+
"Collecting addict (from nemo_toolkit[all])\n",
|
| 299 |
+
" Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)\n",
|
| 300 |
+
"Collecting clip (from nemo_toolkit[all])\n",
|
| 301 |
+
" Downloading clip-0.2.0.tar.gz (5.5 kB)\n",
|
| 302 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 303 |
+
"Collecting decord (from nemo_toolkit[all])\n",
|
| 304 |
+
" Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl (13.6 MB)\n",
|
| 305 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.6/13.6 MB\u001b[0m \u001b[31m74.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 306 |
+
"\u001b[?25hCollecting diffusers>=0.19.3 (from nemo_toolkit[all])\n",
|
| 307 |
+
" Downloading diffusers-0.28.0-py3-none-any.whl (2.2 MB)\n",
|
| 308 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━���━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m77.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 309 |
+
"\u001b[?25hCollecting einops-exts (from nemo_toolkit[all])\n",
|
| 310 |
+
" Downloading einops_exts-0.0.4-py3-none-any.whl (3.9 kB)\n",
|
| 311 |
+
"Requirement already satisfied: imageio in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.31.6)\n",
|
| 312 |
+
"Collecting nerfacc>=0.5.3 (from nemo_toolkit[all])\n",
|
| 313 |
+
" Downloading nerfacc-0.5.3-py3-none-any.whl (54 kB)\n",
|
| 314 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.6/54.6 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 315 |
+
"\u001b[?25hCollecting open-clip-torch (from nemo_toolkit[all])\n",
|
| 316 |
+
" Downloading open_clip_torch-2.24.0-py3-none-any.whl (1.5 MB)\n",
|
| 317 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m74.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 318 |
+
"\u001b[?25hCollecting PyMCubes (from nemo_toolkit[all])\n",
|
| 319 |
+
" Downloading PyMCubes-0.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (274 kB)\n",
|
| 320 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m274.3/274.3 kB\u001b[0m \u001b[31m27.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 321 |
+
"\u001b[?25hCollecting taming-transformers (from nemo_toolkit[all])\n",
|
| 322 |
+
" Downloading taming_transformers-0.0.1-py3-none-any.whl (45 kB)\n",
|
| 323 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.6/45.6 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 324 |
+
"\u001b[?25hCollecting torchdiffeq (from nemo_toolkit[all])\n",
|
| 325 |
+
" Downloading torchdiffeq-0.2.3-py3-none-any.whl (31 kB)\n",
|
| 326 |
+
"Collecting torchsde (from nemo_toolkit[all])\n",
|
| 327 |
+
" Downloading torchsde-0.2.6-py3-none-any.whl (61 kB)\n",
|
| 328 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.2/61.2 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 329 |
+
"\u001b[?25hCollecting trimesh (from nemo_toolkit[all])\n",
|
| 330 |
+
" Downloading trimesh-4.4.0-py3-none-any.whl (694 kB)\n",
|
| 331 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m694.6/694.6 kB\u001b[0m \u001b[31m52.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 332 |
+
"\u001b[?25hCollecting nemo-text-processing (from nemo_toolkit[all])\n",
|
| 333 |
+
" Downloading nemo_text_processing-1.0.2-py3-none-any.whl (2.6 MB)\n",
|
| 334 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m79.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 335 |
+
"\u001b[?25hCollecting mypy-extensions>=0.4.3 (from black~=24.3->nemo_toolkit[all])\n",
|
| 336 |
+
" Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n",
|
| 337 |
+
"Collecting pathspec>=0.9.0 (from black~=24.3->nemo_toolkit[all])\n",
|
| 338 |
+
" Downloading pathspec-0.12.1-py3-none-any.whl (31 kB)\n",
|
| 339 |
+
"Requirement already satisfied: platformdirs>=2 in /usr/local/lib/python3.10/dist-packages (from black~=24.3->nemo_toolkit[all]) (4.2.2)\n",
|
| 340 |
+
"Requirement already satisfied: tomli>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from black~=24.3->nemo_toolkit[all]) (2.0.1)\n",
|
| 341 |
+
"Requirement already satisfied: typing-extensions>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from black~=24.3->nemo_toolkit[all]) (4.11.0)\n",
|
| 342 |
+
"Collecting ninja (from causal-conv1d>=1.2.0->nemo_toolkit[all])\n",
|
| 343 |
+
" Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n",
|
| 344 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.2/307.2 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 345 |
+
"\u001b[?25hRequirement already satisfied: importlib-metadata in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (7.1.0)\n",
|
| 346 |
+
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (3.14.0)\n",
|
| 347 |
+
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (2023.12.25)\n",
|
| 348 |
+
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (2.31.0)\n",
|
| 349 |
+
"Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (0.4.3)\n",
|
| 350 |
+
"Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (9.4.0)\n",
|
| 351 |
+
"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.20.3->nemo_toolkit[all]) (2023.6.0)\n",
|
| 352 |
+
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.20.3->nemo_toolkit[all]) (6.0.1)\n",
|
| 353 |
+
"Collecting antlr4-python3-runtime==4.9.* (from hydra-core<=1.3.2,>1.3->nemo_toolkit[all])\n",
|
| 354 |
+
" Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n",
|
| 355 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 kB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 356 |
+
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 357 |
+
"INFO: pip is looking at multiple versions of jiwer to determine which version is compatible with other requirements. This could take a while.\n",
|
| 358 |
+
"Collecting jiwer (from nemo_toolkit[all])\n",
|
| 359 |
+
" Downloading jiwer-3.0.3-py3-none-any.whl (21 kB)\n",
|
| 360 |
+
" Downloading jiwer-3.0.2-py3-none-any.whl (21 kB)\n",
|
| 361 |
+
" Downloading jiwer-3.0.1-py3-none-any.whl (21 kB)\n",
|
| 362 |
+
" Downloading jiwer-3.0.0-py3-none-any.whl (21 kB)\n",
|
| 363 |
+
" Downloading jiwer-2.6.0-py3-none-any.whl (20 kB)\n",
|
| 364 |
+
" Downloading jiwer-2.5.2-py3-none-any.whl (15 kB)\n",
|
| 365 |
+
"Collecting rapidfuzz (from nemo_toolkit[all])\n",
|
| 366 |
+
" Downloading rapidfuzz-2.13.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n",
|
| 367 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m62.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 368 |
+
"\u001b[?25hRequirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from lhotse>=1.22.0->nemo_toolkit[all]) (3.0.1)\n",
|
| 369 |
+
"Collecting cytoolz>=0.10.1 (from lhotse>=1.22.0->nemo_toolkit[all])\n",
|
| 370 |
+
" Downloading cytoolz-0.12.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n",
|
| 371 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m59.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 372 |
+
"\u001b[?25hCollecting intervaltree>=3.1.0 (from lhotse>=1.22.0->nemo_toolkit[all])\n",
|
| 373 |
+
" Downloading intervaltree-3.1.0.tar.gz (32 kB)\n",
|
| 374 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 375 |
+
"Collecting lilcom>=1.1.0 (from lhotse>=1.22.0->nemo_toolkit[all])\n",
|
| 376 |
+
" Downloading lilcom-1.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (87 kB)\n",
|
| 377 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.1/87.1 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 378 |
+
"\u001b[?25hRequirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (1.4.2)\n",
|
| 379 |
+
"Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (4.4.2)\n",
|
| 380 |
+
"Requirement already satisfied: pooch>=1.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (1.8.1)\n",
|
| 381 |
+
"Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (0.3.7)\n",
|
| 382 |
+
"Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (0.4)\n",
|
| 383 |
+
"Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (1.0.8)\n",
|
| 384 |
+
"Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (1.2.1)\n",
|
| 385 |
+
"Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (0.12.1)\n",
|
| 386 |
+
"Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (4.51.0)\n",
|
| 387 |
+
"Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (1.4.5)\n",
|
| 388 |
+
"Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (3.1.2)\n",
|
| 389 |
+
"Requirement already satisfied: rich>=12 in /usr/local/lib/python3.10/dist-packages (from nerfacc>=0.5.3->nemo_toolkit[all]) (13.7.1)\n",
|
| 390 |
+
"Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->nemo_toolkit[all]) (0.41.1)\n",
|
| 391 |
+
"Requirement already satisfied: protobuf>=3.20.2 in /usr/local/lib/python3.10/dist-packages (from onnx>=1.7.0->nemo_toolkit[all]) (3.20.3)\n",
|
| 392 |
+
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil->nemo_toolkit[all]) (1.16.0)\n",
|
| 393 |
+
"Collecting lightning-utilities>=0.8.0 (from pytorch-lightning>=2.2.1->nemo_toolkit[all])\n",
|
| 394 |
+
" Downloading lightning_utilities-0.11.2-py3-none-any.whl (26 kB)\n",
|
| 395 |
+
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->nemo_toolkit[all]) (3.5.0)\n",
|
| 396 |
+
"Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile->nemo_toolkit[all]) (1.16.0)\n",
|
| 397 |
+
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (1.12)\n",
|
| 398 |
+
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (3.3)\n",
|
| 399 |
+
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (3.1.4)\n",
|
| 400 |
+
"Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
|
| 401 |
+
" Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n",
|
| 402 |
+
"Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
|
| 403 |
+
" Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n",
|
| 404 |
+
"Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
|
| 405 |
+
" Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n",
|
| 406 |
+
"Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->nemo_toolkit[all])\n",
|
| 407 |
+
" Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n",
|
| 408 |
+
"Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->nemo_toolkit[all])\n",
|
| 409 |
+
" Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n",
|
| 410 |
+
"Collecting nvidia-cufft-cu12==11.0.2.54 (from torch->nemo_toolkit[all])\n",
|
| 411 |
+
" Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n",
|
| 412 |
+
"Collecting nvidia-curand-cu12==10.3.2.106 (from torch->nemo_toolkit[all])\n",
|
| 413 |
+
" Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n",
|
| 414 |
+
"Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch->nemo_toolkit[all])\n",
|
| 415 |
+
" Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n",
|
| 416 |
+
"Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch->nemo_toolkit[all])\n",
|
| 417 |
+
" Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n",
|
| 418 |
+
"Collecting nvidia-nccl-cu12==2.20.5 (from torch->nemo_toolkit[all])\n",
|
| 419 |
+
" Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n",
|
| 420 |
+
"Collecting nvidia-nvtx-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
|
| 421 |
+
" Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n",
|
| 422 |
+
"Requirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (2.3.0)\n",
|
| 423 |
+
"Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch->nemo_toolkit[all])\n",
|
| 424 |
+
" Downloading nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl (21.3 MB)\n",
|
| 425 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m57.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 426 |
+
"\u001b[?25hRequirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers<=4.40.2,>=4.36.0->nemo_toolkit[all]) (0.19.1)\n",
|
| 427 |
+
"Collecting botocore<1.35.0,>=1.34.113 (from boto3->nemo_toolkit[all])\n",
|
| 428 |
+
" Downloading botocore-1.34.113-py3-none-any.whl (12.3 MB)\n",
|
| 429 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m78.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 430 |
+
"\u001b[?25hCollecting jmespath<2.0.0,>=0.7.1 (from boto3->nemo_toolkit[all])\n",
|
| 431 |
+
" Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n",
|
| 432 |
+
"Collecting s3transfer<0.11.0,>=0.10.0 (from boto3->nemo_toolkit[all])\n",
|
| 433 |
+
" Downloading s3transfer-0.10.1-py3-none-any.whl (82 kB)\n",
|
| 434 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━���━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.2/82.2 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 435 |
+
"\u001b[?25hRequirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->nemo_toolkit[all]) (14.0.2)\n",
|
| 436 |
+
"Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets->nemo_toolkit[all]) (0.6)\n",
|
| 437 |
+
"Collecting dill<0.3.9,>=0.3.0 (from datasets->nemo_toolkit[all])\n",
|
| 438 |
+
" Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
|
| 439 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 440 |
+
"\u001b[?25hCollecting xxhash (from datasets->nemo_toolkit[all])\n",
|
| 441 |
+
" Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
|
| 442 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m10.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 443 |
+
"\u001b[?25hCollecting multiprocess (from datasets->nemo_toolkit[all])\n",
|
| 444 |
+
" Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
|
| 445 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m15.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 446 |
+
"\u001b[?25hRequirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->nemo_toolkit[all]) (3.9.5)\n",
|
| 447 |
+
"Collecting pybind11>=2.2 (from fasttext->nemo_toolkit[all])\n",
|
| 448 |
+
" Using cached pybind11-2.12.0-py3-none-any.whl (234 kB)\n",
|
| 449 |
+
"Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from fiddle->nemo_toolkit[all]) (1.4.0)\n",
|
| 450 |
+
"Requirement already satisfied: graphviz in /usr/local/lib/python3.10/dist-packages (from fiddle->nemo_toolkit[all]) (0.20.3)\n",
|
| 451 |
+
"Collecting libcst (from fiddle->nemo_toolkit[all])\n",
|
| 452 |
+
" Downloading libcst-1.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n",
|
| 453 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m65.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 454 |
+
"\u001b[?25hCollecting aniso8601>=0.82 (from flask-restful->nemo_toolkit[all])\n",
|
| 455 |
+
" Downloading aniso8601-9.0.1-py2.py3-none-any.whl (52 kB)\n",
|
| 456 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m52.8/52.8 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 457 |
+
"\u001b[?25hRequirement already satisfied: Flask>=0.8 in /usr/local/lib/python3.10/dist-packages (from flask-restful->nemo_toolkit[all]) (2.2.5)\n",
|
| 458 |
+
"Requirement already satisfied: pytz in /usr/local/lib/python3.10/dist-packages (from flask-restful->nemo_toolkit[all]) (2023.4)\n",
|
| 459 |
+
"Requirement already satisfied: wcwidth<0.3.0,>=0.2.12 in /usr/local/lib/python3.10/dist-packages (from ftfy->nemo_toolkit[all]) (0.2.13)\n",
|
| 460 |
+
"Collecting distance>=0.1.3 (from g2p-en->nemo_toolkit[all])\n",
|
| 461 |
+
" Downloading Distance-0.1.3.tar.gz (180 kB)\n",
|
| 462 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m180.3/180.3 kB\u001b[0m \u001b[31m16.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 463 |
+
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 464 |
+
"Requirement already satisfied: pydantic>=1.9.1 in /usr/local/lib/python3.10/dist-packages (from inflect->nemo_toolkit[all]) (2.7.1)\n",
|
| 465 |
+
"Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from gdown->nemo_toolkit[all]) (4.12.3)\n",
|
| 466 |
+
"Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (5.5.6)\n",
|
| 467 |
+
"Requirement already satisfied: ipython-genutils~=0.2.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (0.2.0)\n",
|
| 468 |
+
"Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (5.7.1)\n",
|
| 469 |
+
"Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (3.6.6)\n",
|
| 470 |
+
"Requirement already satisfied: ipython>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (7.34.0)\n",
|
| 471 |
+
"Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (3.0.10)\n",
|
| 472 |
+
"Collecting kornia-rs>=0.1.0 (from kornia->nemo_toolkit[all])\n",
|
| 473 |
+
" Downloading kornia_rs-0.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)\n",
|
| 474 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m89.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 475 |
+
"\u001b[?25hCollecting cdifflib (from nemo-text-processing->nemo_toolkit[all])\n",
|
| 476 |
+
" Downloading cdifflib-1.2.6.tar.gz (11 kB)\n",
|
| 477 |
+
" Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
|
| 478 |
+
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
|
| 479 |
+
" Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n",
|
| 480 |
+
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
|
| 481 |
+
"Collecting pynini==2.1.5 (from nemo-text-processing->nemo_toolkit[all])\n",
|
| 482 |
+
" Downloading pynini-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (161.3 MB)\n",
|
| 483 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m161.3/161.3 MB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 484 |
+
"\u001b[?25hRequirement already satisfied: Cython>=0.29 in /usr/local/lib/python3.10/dist-packages (from pynini==2.1.5->nemo-text-processing->nemo_toolkit[all]) (3.0.10)\n",
|
| 485 |
+
"Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from open-clip-torch->nemo_toolkit[all]) (0.18.0+cu121)\n",
|
| 486 |
+
"Collecting timm (from open-clip-torch->nemo_toolkit[all])\n",
|
| 487 |
+
" Downloading timm-1.0.3-py3-none-any.whl (2.3 MB)\n",
|
| 488 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m85.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 489 |
+
"\u001b[?25hRequirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->nemo_toolkit[all]) (2024.1)\n",
|
| 490 |
+
"Requirement already satisfied: sortedcontainers>=2.0.4 in /usr/local/lib/python3.10/dist-packages (from pyannote.core->nemo_toolkit[all]) (2.4.0)\n",
|
| 491 |
+
"Collecting pyannote.database>=4.0.1 (from pyannote.metrics->nemo_toolkit[all])\n",
|
| 492 |
+
" Downloading pyannote.database-5.1.0-py3-none-any.whl (48 kB)\n",
|
| 493 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m48.1/48.1 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 494 |
+
"\u001b[?25hCollecting docopt>=0.6.2 (from pyannote.metrics->nemo_toolkit[all])\n",
|
| 495 |
+
" Downloading docopt-0.6.2.tar.gz (25 kB)\n",
|
| 496 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 497 |
+
"Requirement already satisfied: future>=0.16.0 in /usr/local/lib/python3.10/dist-packages (from pyloudnorm->nemo_toolkit[all]) (0.18.3)\n",
|
| 498 |
+
"Requirement already satisfied: iniconfig in /usr/local/lib/python3.10/dist-packages (from pytest->nemo_toolkit[all]) (2.0.0)\n",
|
| 499 |
+
"Requirement already satisfied: pluggy<2.0,>=0.12 in /usr/local/lib/python3.10/dist-packages (from pytest->nemo_toolkit[all]) (1.5.0)\n",
|
| 500 |
+
"Requirement already satisfied: exceptiongroup>=1.0.0rc8 in /usr/local/lib/python3.10/dist-packages (from pytest->nemo_toolkit[all]) (1.2.1)\n",
|
| 501 |
+
"Collecting ruamel.yaml.clib>=0.2.7 (from ruamel.yaml->nemo_toolkit[all])\n",
|
| 502 |
+
" Downloading ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (526 kB)\n",
|
| 503 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m526.7/526.7 kB\u001b[0m \u001b[31m48.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 504 |
+
"\u001b[?25hCollecting portalocker (from sacrebleu->nemo_toolkit[all])\n",
|
| 505 |
+
" Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)\n",
|
| 506 |
+
"Collecting colorama (from sacrebleu->nemo_toolkit[all])\n",
|
| 507 |
+
" Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n",
|
| 508 |
+
"Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from sacrebleu->nemo_toolkit[all]) (4.9.4)\n",
|
| 509 |
+
"Requirement already satisfied: sphinxcontrib-applehelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.8)\n",
|
| 510 |
+
"Requirement already satisfied: sphinxcontrib-devhelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.6)\n",
|
| 511 |
+
"Requirement already satisfied: sphinxcontrib-jsmath in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.1)\n",
|
| 512 |
+
"Requirement already satisfied: sphinxcontrib-htmlhelp>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.0.5)\n",
|
| 513 |
+
"Requirement already satisfied: sphinxcontrib-serializinghtml>=1.1.5 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.1.10)\n",
|
| 514 |
+
"Requirement already satisfied: sphinxcontrib-qthelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.7)\n",
|
| 515 |
+
"Requirement already satisfied: Pygments>=2.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.16.1)\n",
|
| 516 |
+
"Requirement already satisfied: docutils<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (0.18.1)\n",
|
| 517 |
+
"Requirement already satisfied: snowballstemmer>=1.1 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.2.0)\n",
|
| 518 |
+
"Requirement already satisfied: babel>=1.3 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.15.0)\n",
|
| 519 |
+
"Requirement already satisfied: alabaster<0.8,>=0.7 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (0.7.16)\n",
|
| 520 |
+
"Requirement already satisfied: imagesize in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.4.1)\n",
|
| 521 |
+
"Collecting docutils<0.19,>=0.14 (from sphinx->nemo_toolkit[all])\n",
|
| 522 |
+
" Downloading docutils-0.17.1-py2.py3-none-any.whl (575 kB)\n",
|
| 523 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m575.5/575.5 kB\u001b[0m \u001b[31m48.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 524 |
+
"\u001b[?25hCollecting pybtex>=0.24 (from sphinxcontrib-bibtex->nemo_toolkit[all])\n",
|
| 525 |
+
" Downloading pybtex-0.24.0-py2.py3-none-any.whl (561 kB)\n",
|
| 526 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m561.4/561.4 kB\u001b[0m \u001b[31m50.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 527 |
+
"\u001b[?25hCollecting pybtex-docutils>=1.0.0 (from sphinxcontrib-bibtex->nemo_toolkit[all])\n",
|
| 528 |
+
" Downloading pybtex_docutils-1.0.3-py3-none-any.whl (6.4 kB)\n",
|
| 529 |
+
"Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (1.64.0)\n",
|
| 530 |
+
"Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (2.27.0)\n",
|
| 531 |
+
"Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (1.2.0)\n",
|
| 532 |
+
"Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (3.6)\n",
|
| 533 |
+
"Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (0.7.2)\n",
|
| 534 |
+
"Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (3.0.3)\n",
|
| 535 |
+
"Collecting plac (from texterrors->nemo_toolkit[all])\n",
|
| 536 |
+
" Downloading plac-1.4.3-py2.py3-none-any.whl (22 kB)\n",
|
| 537 |
+
"Collecting loguru (from texterrors->nemo_toolkit[all])\n",
|
| 538 |
+
" Downloading loguru-0.7.2-py3-none-any.whl (62 kB)\n",
|
| 539 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.5/62.5 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 540 |
+
"\u001b[?25hRequirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from texterrors->nemo_toolkit[all]) (2.4.0)\n",
|
| 541 |
+
"Collecting Levenshtein (from texterrors->nemo_toolkit[all])\n",
|
| 542 |
+
" Downloading Levenshtein-0.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (177 kB)\n",
|
| 543 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m19.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 544 |
+
"\u001b[?25hCollecting trampoline>=0.1.2 (from torchsde->nemo_toolkit[all])\n",
|
| 545 |
+
" Downloading trampoline-0.1.2-py3-none-any.whl (5.2 kB)\n",
|
| 546 |
+
"Collecting docker-pycreds>=0.4.0 (from wandb->nemo_toolkit[all])\n",
|
| 547 |
+
" Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n",
|
| 548 |
+
"Collecting gitpython!=3.1.29,>=1.0.0 (from wandb->nemo_toolkit[all])\n",
|
| 549 |
+
" Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)\n",
|
| 550 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.3/207.3 kB\u001b[0m \u001b[31m17.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 551 |
+
"\u001b[?25hRequirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb->nemo_toolkit[all]) (5.9.5)\n",
|
| 552 |
+
"Collecting sentry-sdk>=1.0.0 (from wandb->nemo_toolkit[all])\n",
|
| 553 |
+
" Downloading sentry_sdk-2.3.1-py2.py3-none-any.whl (289 kB)\n",
|
| 554 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m289.0/289.0 kB\u001b[0m \u001b[31m28.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 555 |
+
"\u001b[?25hCollecting setproctitle (from wandb->nemo_toolkit[all])\n",
|
| 556 |
+
" Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)\n",
|
| 557 |
+
"Collecting asciitree (from zarr->nemo_toolkit[all])\n",
|
| 558 |
+
" Downloading asciitree-0.3.3.tar.gz (4.0 kB)\n",
|
| 559 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 560 |
+
"Collecting numcodecs>=0.10.0 (from zarr->nemo_toolkit[all])\n",
|
| 561 |
+
" Downloading numcodecs-0.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.7 MB)\n",
|
| 562 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.7/7.7 MB\u001b[0m \u001b[31m63.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 563 |
+
"\u001b[?25hCollecting fasteners (from zarr->nemo_toolkit[all])\n",
|
| 564 |
+
" Downloading fasteners-0.19-py3-none-any.whl (18 kB)\n",
|
| 565 |
+
"Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /usr/local/lib/python3.10/dist-packages (from botocore<1.35.0,>=1.34.113->boto3->nemo_toolkit[all]) (2.0.7)\n",
|
| 566 |
+
"Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile->nemo_toolkit[all]) (2.22)\n",
|
| 567 |
+
"Requirement already satisfied: toolz>=0.8.0 in /usr/local/lib/python3.10/dist-packages (from cytoolz>=0.10.1->lhotse>=1.22.0->nemo_toolkit[all]) (0.12.1)\n",
|
| 568 |
+
"Requirement already satisfied: itsdangerous>=2.0 in /usr/local/lib/python3.10/dist-packages (from Flask>=0.8->flask-restful->nemo_toolkit[all]) (2.2.0)\n",
|
| 569 |
+
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (1.3.1)\n",
|
| 570 |
+
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (23.2.0)\n",
|
| 571 |
+
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (1.4.1)\n",
|
| 572 |
+
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (6.0.5)\n",
|
| 573 |
+
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (1.9.4)\n",
|
| 574 |
+
"Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (4.0.3)\n",
|
| 575 |
+
"Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.29,>=1.0.0->wandb->nemo_toolkit[all])\n",
|
| 576 |
+
" Downloading gitdb-4.0.11-py3-none-any.whl (62 kB)\n",
|
| 577 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 578 |
+
"\u001b[?25hRequirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (5.3.3)\n",
|
| 579 |
+
"Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (0.4.0)\n",
|
| 580 |
+
"Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (4.9)\n",
|
| 581 |
+
"Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<2,>=0.5->tensorboard->nemo_toolkit[all]) (1.3.1)\n",
|
| 582 |
+
"Requirement already satisfied: jupyter-client in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets->nemo_toolkit[all]) (6.1.12)\n",
|
| 583 |
+
"Requirement already satisfied: tornado>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets->nemo_toolkit[all]) (6.3.3)\n",
|
| 584 |
+
"Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all])\n",
|
| 585 |
+
" Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)\n",
|
| 586 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m73.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 587 |
+
"\u001b[?25hRequirement already satisfied: pickleshare in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.7.5)\n",
|
| 588 |
+
"Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (3.0.43)\n",
|
| 589 |
+
"Requirement already satisfied: backcall in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.2.0)\n",
|
| 590 |
+
"Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.1.7)\n",
|
| 591 |
+
"Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (4.9.0)\n",
|
| 592 |
+
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->nemo_toolkit[all]) (2.1.5)\n",
|
| 593 |
+
"Collecting typer>=0.12.1 (from pyannote.database>=4.0.1->pyannote.metrics->nemo_toolkit[all])\n",
|
| 594 |
+
" Downloading typer-0.12.3-py3-none-any.whl (47 kB)\n",
|
| 595 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 596 |
+
"\u001b[?25hCollecting latexcodec>=1.0.4 (from pybtex>=0.24->sphinxcontrib-bibtex->nemo_toolkit[all])\n",
|
| 597 |
+
" Downloading latexcodec-3.0.0-py3-none-any.whl (18 kB)\n",
|
| 598 |
+
"Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9.1->inflect->nemo_toolkit[all]) (0.7.0)\n",
|
| 599 |
+
"Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9.1->inflect->nemo_toolkit[all]) (2.18.2)\n",
|
| 600 |
+
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (3.3.2)\n",
|
| 601 |
+
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (3.7)\n",
|
| 602 |
+
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (2024.2.2)\n",
|
| 603 |
+
"Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=12->nerfacc>=0.5.3->nemo_toolkit[all]) (3.0.0)\n",
|
| 604 |
+
"Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->nemo_toolkit[all]) (1.3.0)\n",
|
| 605 |
+
"Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.10/dist-packages (from widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (6.5.5)\n",
|
| 606 |
+
"Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->gdown->nemo_toolkit[all]) (2.5)\n",
|
| 607 |
+
"Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata->diffusers>=0.19.3->nemo_toolkit[all]) (3.18.2)\n",
|
| 608 |
+
"INFO: pip is looking at multiple versions of levenshtein to determine which version is compatible with other requirements. This could take a while.\n",
|
| 609 |
+
"Collecting Levenshtein (from texterrors->nemo_toolkit[all])\n",
|
| 610 |
+
" Downloading Levenshtein-0.25.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (177 kB)\n",
|
| 611 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m20.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 612 |
+
"\u001b[?25h Downloading Levenshtein-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (177 kB)\n",
|
| 613 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m21.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 614 |
+
"\u001b[?25h Downloading Levenshtein-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (169 kB)\n",
|
| 615 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m169.4/169.4 kB\u001b[0m \u001b[31m19.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 616 |
+
"\u001b[?25h Downloading Levenshtein-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (172 kB)\n",
|
| 617 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m172.9/172.9 kB\u001b[0m \u001b[31m19.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 618 |
+
"\u001b[?25hRequirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (1.7.1)\n",
|
| 619 |
+
"Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb->nemo_toolkit[all])\n",
|
| 620 |
+
" Downloading smmap-5.0.1-py3-none-any.whl (24 kB)\n",
|
| 621 |
+
"Requirement already satisfied: parso<0.9.0,>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from jedi>=0.16->ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.8.4)\n",
|
| 622 |
+
"Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=12->nerfacc>=0.5.3->nemo_toolkit[all]) (0.1.2)\n",
|
| 623 |
+
"Requirement already satisfied: pyzmq<25,>=17 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (24.0.1)\n",
|
| 624 |
+
"Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (23.1.0)\n",
|
| 625 |
+
"Requirement already satisfied: jupyter-core>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (5.7.2)\n",
|
| 626 |
+
"Requirement already satisfied: nbformat in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (5.10.4)\n",
|
| 627 |
+
"Requirement already satisfied: nbconvert>=5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (6.5.4)\n",
|
| 628 |
+
"Requirement already satisfied: nest-asyncio>=1.5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.6.0)\n",
|
| 629 |
+
"Requirement already satisfied: Send2Trash>=1.8.0 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.8.3)\n",
|
| 630 |
+
"Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.18.1)\n",
|
| 631 |
+
"Requirement already satisfied: prometheus-client in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.20.0)\n",
|
| 632 |
+
"Requirement already satisfied: nbclassic>=0.4.7 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.0.0)\n",
|
| 633 |
+
"Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.10/dist-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.7.0)\n",
|
| 634 |
+
"Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (0.6.0)\n",
|
| 635 |
+
"Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard->nemo_toolkit[all]) (3.2.2)\n",
|
| 636 |
+
"Collecting shellingham>=1.3.0 (from typer>=0.12.1->pyannote.database>=4.0.1->pyannote.metrics->nemo_toolkit[all])\n",
|
| 637 |
+
" Downloading shellingham-1.5.4-py2.py3-none-any.whl (9.8 kB)\n",
|
| 638 |
+
"Requirement already satisfied: jupyter-server>=1.8 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.24.0)\n",
|
| 639 |
+
"Requirement already satisfied: notebook-shim>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.2.4)\n",
|
| 640 |
+
"Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (6.1.0)\n",
|
| 641 |
+
"Requirement already satisfied: defusedxml in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.7.1)\n",
|
| 642 |
+
"Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.4)\n",
|
| 643 |
+
"Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.3.0)\n",
|
| 644 |
+
"Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.8.4)\n",
|
| 645 |
+
"Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.10.0)\n",
|
| 646 |
+
"Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.5.1)\n",
|
| 647 |
+
"Requirement already satisfied: tinycss2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.3.0)\n",
|
| 648 |
+
"Requirement already satisfied: fastjsonschema>=2.15 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (2.19.1)\n",
|
| 649 |
+
"Requirement already satisfied: jsonschema>=2.6 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (4.19.2)\n",
|
| 650 |
+
"Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.10/dist-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (21.2.0)\n",
|
| 651 |
+
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (2023.12.1)\n",
|
| 652 |
+
"Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.35.1)\n",
|
| 653 |
+
"Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.18.1)\n",
|
| 654 |
+
"Requirement already satisfied: anyio<4,>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (3.7.1)\n",
|
| 655 |
+
"Requirement already satisfied: websocket-client in /usr/local/lib/python3.10/dist-packages (from jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.8.0)\n",
|
| 656 |
+
"Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.5.1)\n",
|
| 657 |
+
"Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.3.1)\n",
|
| 658 |
+
"Building wheels for collected packages: causal-conv1d, antlr4-python3-runtime, progress, clip, fasttext, kaldi-python-io, nemo_toolkit, rouge-score, sox, distance, docopt, intervaltree, asciitree, cdifflib\n",
|
| 659 |
+
" Building wheel for causal-conv1d (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 660 |
+
" Created wheel for causal-conv1d: filename=causal_conv1d-1.2.2.post1-cp310-cp310-linux_x86_64.whl size=103643300 sha256=2bba8823ae89bd79c2d067978e0e533fab8298f69855bfc5d199828b278cf66c\n",
|
| 661 |
+
" Stored in directory: /root/.cache/pip/wheels/22/a7/db/0c9482dec3707ad23181b0eb2da40e4b8f26aaed49752fc49f\n",
|
| 662 |
+
" Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 663 |
+
" Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144554 sha256=d26518c128f80048ec70721551489517353867c7668a281f27cf1a20b9acd114\n",
|
| 664 |
+
" Stored in directory: /root/.cache/pip/wheels/12/93/dd/1f6a127edc45659556564c5730f6d4e300888f4bca2d4c5a88\n",
|
| 665 |
+
" Building wheel for progress (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 666 |
+
" Created wheel for progress: filename=progress-1.6-py3-none-any.whl size=9614 sha256=87c634c79d4e56e317499682766011b5d0e28953e43f6a3754957d0f4fd3633a\n",
|
| 667 |
+
" Stored in directory: /root/.cache/pip/wheels/a2/68/5f/c339b20a41659d856c93ccdce6a33095493eb82c3964aac5a1\n",
|
| 668 |
+
" Building wheel for clip (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 669 |
+
" Created wheel for clip: filename=clip-0.2.0-py3-none-any.whl size=6989 sha256=3e9ac01ba0eff273ea70feaf80d486a07683956515496b6dfeeafe81c9caae24\n",
|
| 670 |
+
" Stored in directory: /root/.cache/pip/wheels/7f/5c/e6/2c0fdb453a3569188864b17e9676bea8b3b7e160c037117869\n",
|
| 671 |
+
" Building wheel for fasttext (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 672 |
+
" Created wheel for fasttext: filename=fasttext-0.9.2-cp310-cp310-linux_x86_64.whl size=4227140 sha256=708a73871f9ae384ea66b706bb0b73b6c624f23ce0d19882b6711b31abed8091\n",
|
| 673 |
+
" Stored in directory: /root/.cache/pip/wheels/a5/13/75/f811c84a8ab36eedbaef977a6a58a98990e8e0f1967f98f394\n",
|
| 674 |
+
" Building wheel for kaldi-python-io (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 675 |
+
" Created wheel for kaldi-python-io: filename=kaldi_python_io-1.2.2-py3-none-any.whl size=8949 sha256=5399346b043c1ae3d7431729bbd34a5206a1bbe26c41b5ba69d2b45879740d55\n",
|
| 676 |
+
" Stored in directory: /root/.cache/pip/wheels/b7/23/5f/49d3a826be576faf61d84e8028e1914bb36a5586ee2613b087\n",
|
| 677 |
+
" Building wheel for nemo_toolkit (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
|
| 678 |
+
" Created wheel for nemo_toolkit: filename=nemo_toolkit-2.0.0rc1-py3-none-any.whl size=3709778 sha256=3eb9e4278cef98370e97bf7cc0f009cdebbdaaf8fac7a6584289fdd8abfbd8c8\n",
|
| 679 |
+
" Stored in directory: /tmp/pip-ephem-wheel-cache-sdhc6zr1/wheels/c3/4e/45/ab3d29aa73df619f27b371cacf809d5330a18f794879163c1b\n",
|
| 680 |
+
" Building wheel for rouge-score (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 681 |
+
" Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=78515a9f3e94b274e69e68c059af462dc7cc1b10b51c1b6d419704ea6b4cffe5\n",
|
| 682 |
+
" Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n",
|
| 683 |
+
" Building wheel for sox (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 684 |
+
" Created wheel for sox: filename=sox-1.5.0-py3-none-any.whl size=40038 sha256=1c48c5456291b6b4859918dc81caa53229a5114b68c2772f1a5518f6c6a21254\n",
|
| 685 |
+
" Stored in directory: /root/.cache/pip/wheels/74/e7/7b/8033be3ec5e4994595d01269fc9657c8fd83a0dcbf8536666a\n",
|
| 686 |
+
" Building wheel for distance (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 687 |
+
" Created wheel for distance: filename=Distance-0.1.3-py3-none-any.whl size=16258 sha256=ade70730449fb839934e857bdcddc6de204e5eaab05db259da2f85be3fc099d0\n",
|
| 688 |
+
" Stored in directory: /root/.cache/pip/wheels/e8/bb/de/f71bf63559ea9a921059a5405806f7ff6ed612a9231c4a9309\n",
|
| 689 |
+
" Building wheel for docopt (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 690 |
+
" Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13706 sha256=1aaae6b0427604326f67708418c3010e0f969b8a82ae512f79307f3978f09f52\n",
|
| 691 |
+
" Stored in directory: /root/.cache/pip/wheels/fc/ab/d4/5da2067ac95b36618c629a5f93f809425700506f72c9732fac\n",
|
| 692 |
+
" Building wheel for intervaltree (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 693 |
+
" Created wheel for intervaltree: filename=intervaltree-3.1.0-py2.py3-none-any.whl size=26096 sha256=f306547725eb9ea7e52d4b78e8d49734164aa4dc43faee9a74ebd91087a42b68\n",
|
| 694 |
+
" Stored in directory: /root/.cache/pip/wheels/fa/80/8c/43488a924a046b733b64de3fac99252674c892a4c3801c0a61\n",
|
| 695 |
+
" Building wheel for asciitree (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 696 |
+
" Created wheel for asciitree: filename=asciitree-0.3.3-py3-none-any.whl size=5034 sha256=0251fcc8a18991f12d3209f3acf225199c31dee41236f40b77b69fde95038da9\n",
|
| 697 |
+
" Stored in directory: /root/.cache/pip/wheels/7f/4e/be/1171b40f43b918087657ec57cf3b81fa1a2e027d8755baa184\n",
|
| 698 |
+
" Building wheel for cdifflib (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
|
| 699 |
+
" Created wheel for cdifflib: filename=cdifflib-1.2.6-cp310-cp310-linux_x86_64.whl size=27681 sha256=7c7f9bf50579f19875573df405249c5c962045cdeb20eea6469b5c2d2defb0ce\n",
|
| 700 |
+
" Stored in directory: /root/.cache/pip/wheels/87/a7/fd/8061e24ed08689045cb6d1ca303768dc463b20a5a338174841\n",
|
| 701 |
+
"Successfully built causal-conv1d antlr4-python3-runtime progress clip fasttext kaldi-python-io nemo_toolkit rouge-score sox distance docopt intervaltree asciitree cdifflib\n",
|
| 702 |
+
"Installing collected packages: trampoline, pydub, progress, plac, pangu, opencc, ninja, ijson, docopt, distance, clip, braceexpand, asciitree, antlr4-python3-runtime, aniso8601, addict, xxhash, webdataset, trimesh, textdistance, sox, smmap, shellingham, setproctitle, sentry-sdk, ruamel.yaml.clib, rapidfuzz, pytest-runner, pypinyin, pynini, pybind11, portalocker, pathspec, parameterized, onnx, omegaconf, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, numcodecs, mypy-extensions, marshmallow, markdown2, loguru, lilcom, lightning-utilities, libcst, latexcodec, kornia-rs, kaldiio, kaldi-python-io, jmespath, jedi, isort, intervaltree, ftfy, fasteners, faiss-cpu, einops, docutils, docker-pycreds, dill, decord, cytoolz, colorama, click, cdifflib, attrdict, zarr, sacremoses, sacrebleu, ruamel.yaml, resampy, pytest-mock, pypinyin-dict, PyMCubes, pyloudnorm, pybtex, pyannote.core, nvidia-cusparse-cu12, nvidia-cudnn-cu12, multiprocess, Levenshtein, jiwer, hydra-core, gitdb, fiddle, fasttext, einops-exts, botocore, black, typer, texterrors, s3transfer, rouge-score, pybtex-docutils, nvidia-cusolver-cu12, lhotse, gitpython, flask-restful, diffusers, wandb, transformers, sphinxcontrib-bibtex, pyannote.database, g2p-en, datasets, boto3, torchsde, torchmetrics, torchdiffeq, sentence-transformers, pyannote.metrics, nerfacc, nemo_toolkit, nemo-text-processing, kornia, causal-conv1d, accelerated-scan, timm, pytorch-lightning, taming-transformers, open-clip-torch\n",
|
| 703 |
+
" Attempting uninstall: docutils\n",
|
| 704 |
+
" Found existing installation: docutils 0.18.1\n",
|
| 705 |
+
" Uninstalling docutils-0.18.1:\n",
|
| 706 |
+
" Successfully uninstalled docutils-0.18.1\n",
|
| 707 |
+
" Attempting uninstall: click\n",
|
| 708 |
+
" Found existing installation: click 8.1.7\n",
|
| 709 |
+
" Uninstalling click-8.1.7:\n",
|
| 710 |
+
" Successfully uninstalled click-8.1.7\n",
|
| 711 |
+
" Attempting uninstall: typer\n",
|
| 712 |
+
" Found existing installation: typer 0.9.4\n",
|
| 713 |
+
" Uninstalling typer-0.9.4:\n",
|
| 714 |
+
" Successfully uninstalled typer-0.9.4\n",
|
| 715 |
+
" Attempting uninstall: transformers\n",
|
| 716 |
+
" Found existing installation: transformers 4.41.0\n",
|
| 717 |
+
" Uninstalling transformers-4.41.0:\n",
|
| 718 |
+
" Successfully uninstalled transformers-4.41.0\n",
|
| 719 |
+
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
| 720 |
+
"spacy 3.7.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n",
|
| 721 |
+
"weasel 0.3.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\u001b[0m\u001b[31m\n",
|
| 722 |
+
"\u001b[0mSuccessfully installed Levenshtein-0.22.0 PyMCubes-0.1.4 accelerated-scan-0.2.0 addict-2.4.0 aniso8601-9.0.1 antlr4-python3-runtime-4.9.3 asciitree-0.3.3 attrdict-2.0.1 black-24.4.2 boto3-1.34.113 botocore-1.34.113 braceexpand-0.1.7 causal-conv1d-1.2.2.post1 cdifflib-1.2.6 click-8.0.2 clip-0.2.0 colorama-0.4.6 cytoolz-0.12.3 datasets-2.19.1 decord-0.6.0 diffusers-0.28.0 dill-0.3.8 distance-0.1.3 docker-pycreds-0.4.0 docopt-0.6.2 docutils-0.17.1 einops-0.8.0 einops-exts-0.0.4 faiss-cpu-1.8.0 fasteners-0.19 fasttext-0.9.2 fiddle-0.3.0 flask-restful-0.3.10 ftfy-6.2.0 g2p-en-2.1.0 gitdb-4.0.11 gitpython-3.1.43 hydra-core-1.3.2 ijson-3.2.3 intervaltree-3.1.0 isort-5.13.2 jedi-0.19.1 jiwer-2.5.2 jmespath-1.0.1 kaldi-python-io-1.2.2 kaldiio-2.18.0 kornia-0.7.2 kornia-rs-0.1.3 latexcodec-3.0.0 lhotse-1.23.0 libcst-1.4.0 lightning-utilities-0.11.2 lilcom-1.7 loguru-0.7.2 markdown2-2.4.13 marshmallow-3.21.2 multiprocess-0.70.16 mypy-extensions-1.0.0 nemo-text-processing-1.0.2 nemo_toolkit-2.0.0rc1 nerfacc-0.5.3 ninja-1.11.1.1 numcodecs-0.12.1 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.5.40 nvidia-nvtx-cu12-12.1.105 omegaconf-2.3.0 onnx-1.16.1 open-clip-torch-2.24.0 opencc-1.1.6 pangu-4.0.6.1 parameterized-0.9.0 pathspec-0.12.1 plac-1.4.3 portalocker-2.8.2 progress-1.6 pyannote.core-5.0.0 pyannote.database-5.1.0 pyannote.metrics-3.2.1 pybind11-2.12.0 pybtex-0.24.0 pybtex-docutils-1.0.3 pydub-0.25.1 pyloudnorm-0.1.1 pynini-2.1.5 pypinyin-0.51.0 pypinyin-dict-0.8.0 pytest-mock-3.14.0 pytest-runner-6.0.1 pytorch-lightning-2.2.5 rapidfuzz-2.13.7 resampy-0.4.3 rouge-score-0.1.2 ruamel.yaml-0.18.6 ruamel.yaml.clib-0.2.8 s3transfer-0.10.1 sacrebleu-2.4.2 sacremoses-0.1.1 sentence-transformers-2.7.0 sentry-sdk-2.3.1 setproctitle-1.3.3 shellingham-1.5.4 smmap-5.0.1 sox-1.5.0 sphinxcontrib-bibtex-2.6.2 taming-transformers-0.0.1 textdistance-4.6.2 texterrors-0.4.4 timm-1.0.3 torchdiffeq-0.2.3 torchmetrics-1.4.0.post0 torchsde-0.2.6 trampoline-0.1.2 transformers-4.40.2 trimesh-4.4.0 typer-0.12.3 wandb-0.17.0 webdataset-0.2.86 xxhash-3.4.1 zarr-2.18.2\n"
|
| 723 |
+
]
|
| 724 |
+
}
|
| 725 |
+
],
|
| 726 |
+
"source": [
|
| 727 |
+
"!pip install wget\n",
|
| 728 |
+
"!apt-get install sox libsndfile1 ffmpeg\n",
|
| 729 |
+
"!pip install matplotlib>=3.3.2\n",
|
| 730 |
+
"\n",
|
| 731 |
+
"!python -m pip install git+https://github.com/NVIDIA/NeMo.git@1fa961ba03ab5f8c91b278640e29807079373372#egg=nemo_toolkit[all]"
|
| 732 |
+
]
|
| 733 |
+
},
|
| 734 |
+
{
|
| 735 |
+
"cell_type": "code",
|
| 736 |
+
"execution_count": null,
|
| 737 |
+
"metadata": {
|
| 738 |
+
"id": "_Utv8kLRW9Js"
|
| 739 |
+
},
|
| 740 |
+
"outputs": [],
|
| 741 |
+
"source": [
|
| 742 |
+
"from typing import List, Union\n",
|
| 743 |
+
"\n",
|
| 744 |
+
"import hydra\n",
|
| 745 |
+
"import soundfile as sf\n",
|
| 746 |
+
"import torch\n",
|
| 747 |
+
"from omegaconf import DictConfig, ListConfig, OmegaConf"
|
| 748 |
+
]
|
| 749 |
+
},
|
| 750 |
+
{
|
| 751 |
+
"cell_type": "markdown",
|
| 752 |
+
"metadata": {
|
| 753 |
+
"id": "ZLslfbEfXQIE"
|
| 754 |
+
},
|
| 755 |
+
"source": [
|
| 756 |
+
"### Model for emotions classification"
|
| 757 |
+
]
|
| 758 |
+
},
|
| 759 |
+
{
|
| 760 |
+
"cell_type": "code",
|
| 761 |
+
"execution_count": null,
|
| 762 |
+
"metadata": {
|
| 763 |
+
"id": "paEKSFFVXPqC"
|
| 764 |
+
},
|
| 765 |
+
"outputs": [],
|
| 766 |
+
"source": [
|
| 767 |
+
"class SpecScaler(torch.nn.Module):\n",
|
| 768 |
+
" def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
|
| 769 |
+
" return torch.log(x.clamp_(1e-9, 1e9))\n",
|
| 770 |
+
"\n",
|
| 771 |
+
"\n",
|
| 772 |
+
"class GigaAMEmo(torch.nn.Module):\n",
|
| 773 |
+
" def __init__(self, conf: Union[DictConfig, ListConfig]):\n",
|
| 774 |
+
" super().__init__()\n",
|
| 775 |
+
" self.id2name = conf.id2name\n",
|
| 776 |
+
" self.feature_extractor = hydra.utils.instantiate(conf.feature_extractor)\n",
|
| 777 |
+
" self.conformer = hydra.utils.instantiate(conf.encoder)\n",
|
| 778 |
+
" self.linear_head = hydra.utils.instantiate(conf.classification_head)\n",
|
| 779 |
+
"\n",
|
| 780 |
+
" def forward(self, features, features_length=None):\n",
|
| 781 |
+
" if features.dim() == 2:\n",
|
| 782 |
+
" features = features.unsqueeze(0)\n",
|
| 783 |
+
" if not features_length:\n",
|
| 784 |
+
" features_length = torch.ones(features.shape[0]) * features.shape[-1]\n",
|
| 785 |
+
" features_length = features_length.to(features.device)\n",
|
| 786 |
+
" encoded, _ = self.conformer(audio_signal=features, length=features_length)\n",
|
| 787 |
+
" encoded_pooled = torch.nn.functional.avg_pool1d(\n",
|
| 788 |
+
" encoded, kernel_size=encoded.shape[-1]\n",
|
| 789 |
+
" ).squeeze(-1)\n",
|
| 790 |
+
"\n",
|
| 791 |
+
" logits = self.linear_head(encoded_pooled)\n",
|
| 792 |
+
" return logits\n",
|
| 793 |
+
"\n",
|
| 794 |
+
" def get_probs(self, audio_path: str) -> List[List[float]]:\n",
|
| 795 |
+
" audio_signal, _ = sf.read(audio_path, dtype=\"float32\")\n",
|
| 796 |
+
" features = self.feature_extractor(\n",
|
| 797 |
+
" torch.tensor(audio_signal).float().to(next(self.parameters()).device)\n",
|
| 798 |
+
" )\n",
|
| 799 |
+
" logits = self.forward(features)\n",
|
| 800 |
+
" probs = torch.nn.functional.softmax(logits).detach().tolist()\n",
|
| 801 |
+
" return probs"
|
| 802 |
+
]
|
| 803 |
+
},
|
| 804 |
+
{
|
| 805 |
+
"cell_type": "markdown",
|
| 806 |
+
"metadata": {
|
| 807 |
+
"id": "7UFpN0Ghc244"
|
| 808 |
+
},
|
| 809 |
+
"source": [
|
| 810 |
+
"### Downloading config, weights and audio example"
|
| 811 |
+
]
|
| 812 |
+
},
|
| 813 |
+
{
|
| 814 |
+
"cell_type": "code",
|
| 815 |
+
"execution_count": null,
|
| 816 |
+
"metadata": {
|
| 817 |
+
"colab": {
|
| 818 |
+
"base_uri": "https://localhost:8080/"
|
| 819 |
+
},
|
| 820 |
+
"id": "jFZJGISjcmHW",
|
| 821 |
+
"outputId": "74a2a71e-2dba-4551-c2cb-737eaa35bfa4"
|
| 822 |
+
},
|
| 823 |
+
"outputs": [
|
| 824 |
+
{
|
| 825 |
+
"name": "stdout",
|
| 826 |
+
"output_type": "stream",
|
| 827 |
+
"text": [
|
| 828 |
+
"--2024-05-28 07:10:07-- https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_weights.ckpt\n",
|
| 829 |
+
"Resolving n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)... 37.230.193.192\n",
|
| 830 |
+
"Connecting to n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)|37.230.193.192|:443... connected.\n",
|
| 831 |
+
"HTTP request sent, awaiting response... 200 OK\n",
|
| 832 |
+
"Length: 968409626 (924M) [application/octet-stream]\n",
|
| 833 |
+
"Saving to: ‘emo_model_weights.ckpt’\n",
|
| 834 |
+
"\n",
|
| 835 |
+
"emo_model_weights.c 100%[===================>] 923.55M 7.48MB/s in 1m 45s \n",
|
| 836 |
+
"\n",
|
| 837 |
+
"2024-05-28 07:11:53 (8.82 MB/s) - ‘emo_model_weights.ckpt’ saved [968409626/968409626]\n",
|
| 838 |
+
"\n",
|
| 839 |
+
"--2024-05-28 07:11:54-- https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_config.yaml\n",
|
| 840 |
+
"Resolving n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)... 37.230.193.192\n",
|
| 841 |
+
"Connecting to n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)|37.230.193.192|:443... connected.\n",
|
| 842 |
+
"HTTP request sent, awaiting response... 200 OK\n",
|
| 843 |
+
"Length: 765 [application/octet-stream]\n",
|
| 844 |
+
"Saving to: ‘emo_model_config.yaml’\n",
|
| 845 |
+
"\n",
|
| 846 |
+
"emo_model_config.ya 100%[===================>] 765 --.-KB/s in 0s \n",
|
| 847 |
+
"\n",
|
| 848 |
+
"2024-05-28 07:11:54 (252 MB/s) - ‘emo_model_config.yaml’ saved [765/765]\n",
|
| 849 |
+
"\n",
|
| 850 |
+
"--2024-05-28 07:11:54-- https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/example.wav\n",
|
| 851 |
+
"Resolving n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)... 37.230.193.192\n",
|
| 852 |
+
"Connecting to n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)|37.230.193.192|:443... connected.\n",
|
| 853 |
+
"HTTP request sent, awaiting response... 200 OK\n",
|
| 854 |
+
"Length: 361324 (353K) [application/octet-stream]\n",
|
| 855 |
+
"Saving to: ‘example.wav’\n",
|
| 856 |
+
"\n",
|
| 857 |
+
"example.wav 100%[===================>] 352.86K 715KB/s in 0.5s \n",
|
| 858 |
+
"\n",
|
| 859 |
+
"2024-05-28 07:11:56 (715 KB/s) - ‘example.wav’ saved [361324/361324]\n",
|
| 860 |
+
"\n"
|
| 861 |
+
]
|
| 862 |
+
}
|
| 863 |
+
],
|
| 864 |
+
"source": [
|
| 865 |
+
"import locale\n",
|
| 866 |
+
"\n",
|
| 867 |
+
"locale.getpreferredencoding = lambda: \"UTF-8\"\n",
|
| 868 |
+
"\n",
|
| 869 |
+
"# Loading weights, config and example wav for CTC-model\n",
|
| 870 |
+
"!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_weights.ckpt\n",
|
| 871 |
+
"!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_config.yaml\n",
|
| 872 |
+
"!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/example.wav"
|
| 873 |
+
]
|
| 874 |
+
},
|
| 875 |
+
{
|
| 876 |
+
"cell_type": "markdown",
|
| 877 |
+
"metadata": {
|
| 878 |
+
"id": "NZ6-O2M0fxDY"
|
| 879 |
+
},
|
| 880 |
+
"source": [
|
| 881 |
+
"### Model instantiating and inference"
|
| 882 |
+
]
|
| 883 |
+
},
|
| 884 |
+
{
|
| 885 |
+
"cell_type": "code",
|
| 886 |
+
"execution_count": null,
|
| 887 |
+
"metadata": {
|
| 888 |
+
"colab": {
|
| 889 |
+
"base_uri": "https://localhost:8080/"
|
| 890 |
+
},
|
| 891 |
+
"id": "plXt8297d5km",
|
| 892 |
+
"outputId": "537acb90-f6a1-4a73-ea66-0d3fe6fd9a3a"
|
| 893 |
+
},
|
| 894 |
+
"outputs": [
|
| 895 |
+
{
|
| 896 |
+
"name": "stderr",
|
| 897 |
+
"output_type": "stream",
|
| 898 |
+
"text": [
|
| 899 |
+
"[NeMo W 2024-05-28 07:20:12 nemo_logging:349] <ipython-input-3-06a5deda234a>:32: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
|
| 900 |
+
" probs = torch.nn.functional.softmax(logits).detach().tolist()\n",
|
| 901 |
+
" \n"
|
| 902 |
+
]
|
| 903 |
+
},
|
| 904 |
+
{
|
| 905 |
+
"name": "stdout",
|
| 906 |
+
"output_type": "stream",
|
| 907 |
+
"text": [
|
| 908 |
+
"angry: 0.000, sad: 0.002, neutral: 0.923, positive: 0.074\n"
|
| 909 |
+
]
|
| 910 |
+
}
|
| 911 |
+
],
|
| 912 |
+
"source": [
|
| 913 |
+
"model_config = \"emo_model_config.yaml\"\n",
|
| 914 |
+
"model_weights = \"emo_model_weights.ckpt\"\n",
|
| 915 |
+
"audio_path = \"example.wav\"\n",
|
| 916 |
+
"device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
|
| 917 |
+
"\n",
|
| 918 |
+
"conf = OmegaConf.load(model_config)\n",
|
| 919 |
+
"model = GigaAMEmo(conf)\n",
|
| 920 |
+
"ckpt = torch.load(model_weights, map_location=\"cpu\")\n",
|
| 921 |
+
"model.load_state_dict(ckpt, strict=False)\n",
|
| 922 |
+
"model = model.to(device)\n",
|
| 923 |
+
"model.eval()\n",
|
| 924 |
+
"with torch.no_grad():\n",
|
| 925 |
+
" probs = model.get_probs(audio_path)[0]\n",
|
| 926 |
+
"print(\", \".join([f\"{model.id2name[i]}: {p:.3f}\" for i, p in enumerate(probs)]))"
|
| 927 |
+
]
|
| 928 |
+
},
|
| 929 |
+
{
|
| 930 |
+
"cell_type": "code",
|
| 931 |
+
"execution_count": null,
|
| 932 |
+
"metadata": {
|
| 933 |
+
"id": "nmFvC_GfkasV"
|
| 934 |
+
},
|
| 935 |
+
"outputs": [],
|
| 936 |
+
"source": []
|
| 937 |
+
}
|
| 938 |
+
],
|
| 939 |
+
"metadata": {
|
| 940 |
+
"accelerator": "GPU",
|
| 941 |
+
"colab": {
|
| 942 |
+
"gpuType": "T4",
|
| 943 |
+
"provenance": []
|
| 944 |
+
},
|
| 945 |
+
"kernelspec": {
|
| 946 |
+
"display_name": "Python 3",
|
| 947 |
+
"name": "python3"
|
| 948 |
+
},
|
| 949 |
+
"language_info": {
|
| 950 |
+
"name": "python"
|
| 951 |
+
}
|
| 952 |
+
},
|
| 953 |
+
"nbformat": 4,
|
| 954 |
+
"nbformat_minor": 0
|
| 955 |
+
}
|
Examples/notebooks/GigaAM_Model_Usage_Example.ipynb
ADDED
|
@@ -0,0 +1,881 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"nbformat": 4,
|
| 3 |
+
"nbformat_minor": 0,
|
| 4 |
+
"metadata": {
|
| 5 |
+
"colab": {
|
| 6 |
+
"provenance": [],
|
| 7 |
+
"gpuType": "T4"
|
| 8 |
+
},
|
| 9 |
+
"kernelspec": {
|
| 10 |
+
"name": "python3",
|
| 11 |
+
"display_name": "Python 3"
|
| 12 |
+
},
|
| 13 |
+
"language_info": {
|
| 14 |
+
"name": "python"
|
| 15 |
+
},
|
| 16 |
+
"accelerator": "GPU"
|
| 17 |
+
},
|
| 18 |
+
"cells": [
|
| 19 |
+
{
|
| 20 |
+
"cell_type": "markdown",
|
| 21 |
+
"source": [
|
| 22 |
+
"### Installing and importing dependencies"
|
| 23 |
+
],
|
| 24 |
+
"metadata": {
|
| 25 |
+
"id": "aqymJFVQhere"
|
| 26 |
+
}
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"cell_type": "code",
|
| 30 |
+
"execution_count": null,
|
| 31 |
+
"metadata": {
|
| 32 |
+
"colab": {
|
| 33 |
+
"base_uri": "https://localhost:8080/"
|
| 34 |
+
},
|
| 35 |
+
"id": "mJ5zzajTbzRX",
|
| 36 |
+
"outputId": "a6e8f1cc-5ef7-43e2-824e-39133c8f3f98"
|
| 37 |
+
},
|
| 38 |
+
"outputs": [
|
| 39 |
+
{
|
| 40 |
+
"output_type": "stream",
|
| 41 |
+
"name": "stdout",
|
| 42 |
+
"text": [
|
| 43 |
+
"Collecting wget\n",
|
| 44 |
+
" Downloading wget-3.2.zip (10 kB)\n",
|
| 45 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 46 |
+
"Building wheels for collected packages: wget\n",
|
| 47 |
+
" Building wheel for wget (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 48 |
+
" Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9656 sha256=fb233af0965c5da90b8babdcb0fbd51095c2a135ec877618013ed9078dced85b\n",
|
| 49 |
+
" Stored in directory: /root/.cache/pip/wheels/8b/f1/7f/5c94f0a7a505ca1c81cd1d9208ae2064675d97582078e6c769\n",
|
| 50 |
+
"Successfully built wget\n",
|
| 51 |
+
"Installing collected packages: wget\n",
|
| 52 |
+
"Successfully installed wget-3.2\n",
|
| 53 |
+
"Reading package lists... Done\n",
|
| 54 |
+
"Building dependency tree... Done\n",
|
| 55 |
+
"Reading state information... Done\n",
|
| 56 |
+
"libsndfile1 is already the newest version (1.0.31-2ubuntu0.1).\n",
|
| 57 |
+
"ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).\n",
|
| 58 |
+
"The following additional packages will be installed:\n",
|
| 59 |
+
" libopencore-amrnb0 libopencore-amrwb0 libsox-fmt-alsa libsox-fmt-base libsox3 libwavpack1\n",
|
| 60 |
+
"Suggested packages:\n",
|
| 61 |
+
" libsox-fmt-all\n",
|
| 62 |
+
"The following NEW packages will be installed:\n",
|
| 63 |
+
" libopencore-amrnb0 libopencore-amrwb0 libsox-fmt-alsa libsox-fmt-base libsox3 libwavpack1 sox\n",
|
| 64 |
+
"0 upgraded, 7 newly installed, 0 to remove and 45 not upgraded.\n",
|
| 65 |
+
"Need to get 617 kB of archives.\n",
|
| 66 |
+
"After this operation, 1,764 kB of additional disk space will be used.\n",
|
| 67 |
+
"Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libopencore-amrnb0 amd64 0.1.5-1 [94.8 kB]\n",
|
| 68 |
+
"Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libopencore-amrwb0 amd64 0.1.5-1 [49.1 kB]\n",
|
| 69 |
+
"Get:3 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox3 amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [240 kB]\n",
|
| 70 |
+
"Get:4 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox-fmt-alsa amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [11.2 kB]\n",
|
| 71 |
+
"Get:5 http://archive.ubuntu.com/ubuntu jammy/main amd64 libwavpack1 amd64 5.4.0-1build2 [83.7 kB]\n",
|
| 72 |
+
"Get:6 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox-fmt-base amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [33.7 kB]\n",
|
| 73 |
+
"Get:7 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 sox amd64 14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1 [104 kB]\n",
|
| 74 |
+
"Fetched 617 kB in 0s (2,171 kB/s)\n",
|
| 75 |
+
"Selecting previously unselected package libopencore-amrnb0:amd64.\n",
|
| 76 |
+
"(Reading database ... 121918 files and directories currently installed.)\n",
|
| 77 |
+
"Preparing to unpack .../0-libopencore-amrnb0_0.1.5-1_amd64.deb ...\n",
|
| 78 |
+
"Unpacking libopencore-amrnb0:amd64 (0.1.5-1) ...\n",
|
| 79 |
+
"Selecting previously unselected package libopencore-amrwb0:amd64.\n",
|
| 80 |
+
"Preparing to unpack .../1-libopencore-amrwb0_0.1.5-1_amd64.deb ...\n",
|
| 81 |
+
"Unpacking libopencore-amrwb0:amd64 (0.1.5-1) ...\n",
|
| 82 |
+
"Selecting previously unselected package libsox3:amd64.\n",
|
| 83 |
+
"Preparing to unpack .../2-libsox3_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
|
| 84 |
+
"Unpacking libsox3:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
|
| 85 |
+
"Selecting previously unselected package libsox-fmt-alsa:amd64.\n",
|
| 86 |
+
"Preparing to unpack .../3-libsox-fmt-alsa_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
|
| 87 |
+
"Unpacking libsox-fmt-alsa:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
|
| 88 |
+
"Selecting previously unselected package libwavpack1:amd64.\n",
|
| 89 |
+
"Preparing to unpack .../4-libwavpack1_5.4.0-1build2_amd64.deb ...\n",
|
| 90 |
+
"Unpacking libwavpack1:amd64 (5.4.0-1build2) ...\n",
|
| 91 |
+
"Selecting previously unselected package libsox-fmt-base:amd64.\n",
|
| 92 |
+
"Preparing to unpack .../5-libsox-fmt-base_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
|
| 93 |
+
"Unpacking libsox-fmt-base:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
|
| 94 |
+
"Selecting previously unselected package sox.\n",
|
| 95 |
+
"Preparing to unpack .../6-sox_14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1_amd64.deb ...\n",
|
| 96 |
+
"Unpacking sox (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
|
| 97 |
+
"Setting up libsox3:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
|
| 98 |
+
"Setting up libopencore-amrwb0:amd64 (0.1.5-1) ...\n",
|
| 99 |
+
"Setting up libsox-fmt-alsa:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
|
| 100 |
+
"Setting up libwavpack1:amd64 (5.4.0-1build2) ...\n",
|
| 101 |
+
"Setting up libopencore-amrnb0:amd64 (0.1.5-1) ...\n",
|
| 102 |
+
"Setting up libsox-fmt-base:amd64 (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
|
| 103 |
+
"Setting up sox (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1) ...\n",
|
| 104 |
+
"Processing triggers for man-db (2.10.2-1) ...\n",
|
| 105 |
+
"Processing triggers for libc-bin (2.35-0ubuntu3.4) ...\n",
|
| 106 |
+
"/sbin/ldconfig.real: /usr/local/lib/libtbb.so.12 is not a symbolic link\n",
|
| 107 |
+
"\n",
|
| 108 |
+
"/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc_proxy.so.2 is not a symbolic link\n",
|
| 109 |
+
"\n",
|
| 110 |
+
"/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_5.so.3 is not a symbolic link\n",
|
| 111 |
+
"\n",
|
| 112 |
+
"/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_0.so.3 is not a symbolic link\n",
|
| 113 |
+
"\n",
|
| 114 |
+
"/sbin/ldconfig.real: /usr/local/lib/libtbbbind.so.3 is not a symbolic link\n",
|
| 115 |
+
"\n",
|
| 116 |
+
"/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc.so.2 is not a symbolic link\n",
|
| 117 |
+
"\n",
|
| 118 |
+
"\u001b[33mDEPRECATION: git+https://github.com/NVIDIA/NeMo.git#egg=nemo_toolkit[all] contains an egg fragment with a non-PEP 508 name pip 25.0 will enforce this behaviour change. A possible replacement is to use the req @ url syntax, and remove the egg fragment. Discussion can be found at https://github.com/pypa/pip/issues/11617\u001b[0m\u001b[33m\n",
|
| 119 |
+
"\u001b[0mCollecting nemo_toolkit[all]\n",
|
| 120 |
+
" Cloning https://github.com/NVIDIA/NeMo.git to /tmp/pip-install-rgi4yev1/nemo-toolkit_1a843c4761a042a6a6b3b0dfbb81352c\n",
|
| 121 |
+
" Running command git clone --filter=blob:none --quiet https://github.com/NVIDIA/NeMo.git /tmp/pip-install-rgi4yev1/nemo-toolkit_1a843c4761a042a6a6b3b0dfbb81352c\n",
|
| 122 |
+
" Resolved https://github.com/NVIDIA/NeMo.git to commit 1fa961ba03ab5f8c91b278640e29807079373372\n",
|
| 123 |
+
" Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
|
| 124 |
+
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
|
| 125 |
+
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
|
| 126 |
+
"Collecting fiddle (from nemo_toolkit[all])\n",
|
| 127 |
+
" Downloading fiddle-0.3.0-py3-none-any.whl (419 kB)\n",
|
| 128 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m419.8/419.8 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 129 |
+
"\u001b[?25hRequirement already satisfied: huggingface-hub>=0.20.3 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.23.1)\n",
|
| 130 |
+
"Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.58.1)\n",
|
| 131 |
+
"Requirement already satisfied: numpy>=1.22 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.25.2)\n",
|
| 132 |
+
"Collecting onnx>=1.7.0 (from nemo_toolkit[all])\n",
|
| 133 |
+
" Downloading onnx-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)\n",
|
| 134 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m64.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 135 |
+
"\u001b[?25hRequirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.8.2)\n",
|
| 136 |
+
"Collecting ruamel.yaml (from nemo_toolkit[all])\n",
|
| 137 |
+
" Downloading ruamel.yaml-0.18.6-py3-none-any.whl (117 kB)\n",
|
| 138 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.8/117.8 kB\u001b[0m \u001b[31m11.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 139 |
+
"\u001b[?25hRequirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.2.2)\n",
|
| 140 |
+
"Requirement already satisfied: setuptools>=65.5.1 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (67.7.2)\n",
|
| 141 |
+
"Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.15.2)\n",
|
| 142 |
+
"Requirement already satisfied: text-unidecode in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.3)\n",
|
| 143 |
+
"Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.3.0+cu121)\n",
|
| 144 |
+
"Requirement already satisfied: tqdm>=4.41.0 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (4.66.4)\n",
|
| 145 |
+
"Requirement already satisfied: wget in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.2)\n",
|
| 146 |
+
"Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.14.1)\n",
|
| 147 |
+
"Collecting black~=24.3 (from nemo_toolkit[all])\n",
|
| 148 |
+
" Downloading black-24.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.8 MB)\n",
|
| 149 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m56.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 150 |
+
"\u001b[?25hCollecting click==8.0.2 (from nemo_toolkit[all])\n",
|
| 151 |
+
" Downloading click-8.0.2-py3-none-any.whl (97 kB)\n",
|
| 152 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m97.6/97.6 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 153 |
+
"\u001b[?25hCollecting isort<6.0.0,>5.1.0 (from nemo_toolkit[all])\n",
|
| 154 |
+
" Downloading isort-5.13.2-py3-none-any.whl (92 kB)\n",
|
| 155 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.3/92.3 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 156 |
+
"\u001b[?25hCollecting parameterized (from nemo_toolkit[all])\n",
|
| 157 |
+
" Downloading parameterized-0.9.0-py2.py3-none-any.whl (20 kB)\n",
|
| 158 |
+
"Requirement already satisfied: pytest in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (7.4.4)\n",
|
| 159 |
+
"Collecting pytest-mock (from nemo_toolkit[all])\n",
|
| 160 |
+
" Downloading pytest_mock-3.14.0-py3-none-any.whl (9.9 kB)\n",
|
| 161 |
+
"Collecting pytest-runner (from nemo_toolkit[all])\n",
|
| 162 |
+
" Downloading pytest_runner-6.0.1-py3-none-any.whl (7.2 kB)\n",
|
| 163 |
+
"Requirement already satisfied: sphinx in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (5.0.2)\n",
|
| 164 |
+
"Collecting sphinxcontrib-bibtex (from nemo_toolkit[all])\n",
|
| 165 |
+
" Downloading sphinxcontrib_bibtex-2.6.2-py3-none-any.whl (40 kB)\n",
|
| 166 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.0/41.0 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 167 |
+
"\u001b[?25hCollecting wandb (from nemo_toolkit[all])\n",
|
| 168 |
+
" Downloading wandb-0.17.0-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.7 MB)\n",
|
| 169 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.7/6.7 MB\u001b[0m \u001b[31m57.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 170 |
+
"\u001b[?25hRequirement already satisfied: cloudpickle in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.2.1)\n",
|
| 171 |
+
"Collecting hydra-core<=1.3.2,>1.3 (from nemo_toolkit[all])\n",
|
| 172 |
+
" Downloading hydra_core-1.3.2-py3-none-any.whl (154 kB)\n",
|
| 173 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m154.5/154.5 kB\u001b[0m \u001b[31m14.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 174 |
+
"\u001b[?25hCollecting omegaconf<=2.3 (from nemo_toolkit[all])\n",
|
| 175 |
+
" Downloading omegaconf-2.3.0-py3-none-any.whl (79 kB)\n",
|
| 176 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 177 |
+
"\u001b[?25hCollecting pytorch-lightning>=2.2.1 (from nemo_toolkit[all])\n",
|
| 178 |
+
" Downloading pytorch_lightning-2.2.5-py3-none-any.whl (802 kB)\n",
|
| 179 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m802.3/802.3 kB\u001b[0m \u001b[31m47.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 180 |
+
"\u001b[?25hCollecting torchmetrics>=0.11.0 (from nemo_toolkit[all])\n",
|
| 181 |
+
" Downloading torchmetrics-1.4.0.post0-py3-none-any.whl (868 kB)\n",
|
| 182 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m868.8/868.8 kB\u001b[0m \u001b[31m46.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 183 |
+
"\u001b[?25hCollecting transformers<=4.40.2,>=4.36.0 (from nemo_toolkit[all])\n",
|
| 184 |
+
" Downloading transformers-4.40.2-py3-none-any.whl (9.0 MB)\n",
|
| 185 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.0/9.0 MB\u001b[0m \u001b[31m70.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 186 |
+
"\u001b[?25hCollecting webdataset>=0.2.86 (from nemo_toolkit[all])\n",
|
| 187 |
+
" Downloading webdataset-0.2.86-py3-none-any.whl (70 kB)\n",
|
| 188 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m70.4/70.4 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 189 |
+
"\u001b[?25hCollecting datasets (from nemo_toolkit[all])\n",
|
| 190 |
+
" Downloading datasets-2.19.1-py3-none-any.whl (542 kB)\n",
|
| 191 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m542.0/542.0 kB\u001b[0m \u001b[31m40.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 192 |
+
"\u001b[?25hRequirement already satisfied: inflect in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (7.0.0)\n",
|
| 193 |
+
"Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.0.3)\n",
|
| 194 |
+
"Collecting sacremoses>=0.0.43 (from nemo_toolkit[all])\n",
|
| 195 |
+
" Downloading sacremoses-0.1.1-py3-none-any.whl (897 kB)\n",
|
| 196 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m897.5/897.5 kB\u001b[0m \u001b[31m56.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 197 |
+
"\u001b[?25hRequirement already satisfied: sentencepiece<1.0.0 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.1.99)\n",
|
| 198 |
+
"Collecting braceexpand (from nemo_toolkit[all])\n",
|
| 199 |
+
" Downloading braceexpand-0.1.7-py2.py3-none-any.whl (5.9 kB)\n",
|
| 200 |
+
"Requirement already satisfied: editdistance in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.6.2)\n",
|
| 201 |
+
"Collecting einops (from nemo_toolkit[all])\n",
|
| 202 |
+
" Downloading einops-0.8.0-py3-none-any.whl (43 kB)\n",
|
| 203 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.2/43.2 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 204 |
+
"\u001b[?25hCollecting g2p-en (from nemo_toolkit[all])\n",
|
| 205 |
+
" Downloading g2p_en-2.1.0-py3-none-any.whl (3.1 MB)\n",
|
| 206 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m76.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 207 |
+
"\u001b[?25hRequirement already satisfied: ipywidgets in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (7.7.1)\n",
|
| 208 |
+
"Collecting jiwer (from nemo_toolkit[all])\n",
|
| 209 |
+
" Downloading jiwer-3.0.4-py3-none-any.whl (21 kB)\n",
|
| 210 |
+
"Collecting kaldi-python-io (from nemo_toolkit[all])\n",
|
| 211 |
+
" Downloading kaldi-python-io-1.2.2.tar.gz (8.8 kB)\n",
|
| 212 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 213 |
+
"Collecting kaldiio (from nemo_toolkit[all])\n",
|
| 214 |
+
" Downloading kaldiio-2.18.0-py3-none-any.whl (28 kB)\n",
|
| 215 |
+
"Collecting lhotse>=1.22.0 (from nemo_toolkit[all])\n",
|
| 216 |
+
" Downloading lhotse-1.23.0-py3-none-any.whl (772 kB)\n",
|
| 217 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m772.4/772.4 kB\u001b[0m \u001b[31m50.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 218 |
+
"\u001b[?25hRequirement already satisfied: librosa>=0.10.0 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.10.2.post1)\n",
|
| 219 |
+
"Collecting marshmallow (from nemo_toolkit[all])\n",
|
| 220 |
+
" Downloading marshmallow-3.21.2-py3-none-any.whl (49 kB)\n",
|
| 221 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 222 |
+
"\u001b[?25hRequirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.7.1)\n",
|
| 223 |
+
"Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (24.0)\n",
|
| 224 |
+
"Collecting pyannote.core (from nemo_toolkit[all])\n",
|
| 225 |
+
" Downloading pyannote.core-5.0.0-py3-none-any.whl (58 kB)\n",
|
| 226 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.5/58.5 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 227 |
+
"\u001b[?25hCollecting pyannote.metrics (from nemo_toolkit[all])\n",
|
| 228 |
+
" Downloading pyannote.metrics-3.2.1-py3-none-any.whl (51 kB)\n",
|
| 229 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.4/51.4 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 230 |
+
"\u001b[?25hCollecting pydub (from nemo_toolkit[all])\n",
|
| 231 |
+
" Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
|
| 232 |
+
"Collecting pyloudnorm (from nemo_toolkit[all])\n",
|
| 233 |
+
" Downloading pyloudnorm-0.1.1-py3-none-any.whl (9.6 kB)\n",
|
| 234 |
+
"Collecting resampy (from nemo_toolkit[all])\n",
|
| 235 |
+
" Downloading resampy-0.4.3-py3-none-any.whl (3.1 MB)\n",
|
| 236 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m82.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 237 |
+
"\u001b[?25hRequirement already satisfied: scipy>=0.14 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (1.11.4)\n",
|
| 238 |
+
"Requirement already satisfied: soundfile in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.12.1)\n",
|
| 239 |
+
"Collecting sox (from nemo_toolkit[all])\n",
|
| 240 |
+
" Downloading sox-1.5.0.tar.gz (63 kB)\n",
|
| 241 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.9/63.9 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 242 |
+
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 243 |
+
"Collecting texterrors (from nemo_toolkit[all])\n",
|
| 244 |
+
" Downloading texterrors-0.4.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n",
|
| 245 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m58.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 246 |
+
"\u001b[?25hCollecting accelerated-scan (from nemo_toolkit[all])\n",
|
| 247 |
+
" Downloading accelerated_scan-0.2.0-py3-none-any.whl (11 kB)\n",
|
| 248 |
+
"Collecting boto3 (from nemo_toolkit[all])\n",
|
| 249 |
+
" Downloading boto3-1.34.113-py3-none-any.whl (139 kB)\n",
|
| 250 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.3/139.3 kB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 251 |
+
"\u001b[?25hCollecting causal-conv1d>=1.2.0 (from nemo_toolkit[all])\n",
|
| 252 |
+
" Downloading causal_conv1d-1.2.2.post1.tar.gz (7.2 kB)\n",
|
| 253 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 254 |
+
"Collecting faiss-cpu (from nemo_toolkit[all])\n",
|
| 255 |
+
" Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)\n",
|
| 256 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.0/27.0 MB\u001b[0m \u001b[31m41.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 257 |
+
"\u001b[?25hCollecting fasttext (from nemo_toolkit[all])\n",
|
| 258 |
+
" Downloading fasttext-0.9.2.tar.gz (68 kB)\n",
|
| 259 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m68.8/68.8 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 260 |
+
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 261 |
+
"Collecting flask-restful (from nemo_toolkit[all])\n",
|
| 262 |
+
" Downloading Flask_RESTful-0.3.10-py2.py3-none-any.whl (26 kB)\n",
|
| 263 |
+
"Collecting ftfy (from nemo_toolkit[all])\n",
|
| 264 |
+
" Downloading ftfy-6.2.0-py3-none-any.whl (54 kB)\n",
|
| 265 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.4/54.4 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 266 |
+
"\u001b[?25hRequirement already satisfied: gdown in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (5.1.0)\n",
|
| 267 |
+
"Requirement already satisfied: h5py in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.9.0)\n",
|
| 268 |
+
"Collecting ijson (from nemo_toolkit[all])\n",
|
| 269 |
+
" Downloading ijson-3.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (111 kB)\n",
|
| 270 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.8/111.8 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 271 |
+
"\u001b[?25hRequirement already satisfied: jieba in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.42.1)\n",
|
| 272 |
+
"Collecting markdown2 (from nemo_toolkit[all])\n",
|
| 273 |
+
" Downloading markdown2-2.4.13-py2.py3-none-any.whl (41 kB)\n",
|
| 274 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.3/41.3 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 275 |
+
"\u001b[?25hRequirement already satisfied: nltk>=3.6.5 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (3.8.1)\n",
|
| 276 |
+
"Collecting opencc<1.1.7 (from nemo_toolkit[all])\n",
|
| 277 |
+
" Downloading OpenCC-1.1.6-cp310-cp310-manylinux1_x86_64.whl (778 kB)\n",
|
| 278 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m778.3/778.3 kB\u001b[0m \u001b[31m43.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 279 |
+
"\u001b[?25hCollecting pangu (from nemo_toolkit[all])\n",
|
| 280 |
+
" Downloading pangu-4.0.6.1-py3-none-any.whl (6.4 kB)\n",
|
| 281 |
+
"Collecting rapidfuzz (from nemo_toolkit[all])\n",
|
| 282 |
+
" Downloading rapidfuzz-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n",
|
| 283 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m76.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 284 |
+
"\u001b[?25hCollecting rouge-score (from nemo_toolkit[all])\n",
|
| 285 |
+
" Downloading rouge_score-0.1.2.tar.gz (17 kB)\n",
|
| 286 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 287 |
+
"Collecting sacrebleu (from nemo_toolkit[all])\n",
|
| 288 |
+
" Downloading sacrebleu-2.4.2-py3-none-any.whl (106 kB)\n",
|
| 289 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.7/106.7 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 290 |
+
"\u001b[?25hCollecting sentence-transformers (from nemo_toolkit[all])\n",
|
| 291 |
+
" Downloading sentence_transformers-2.7.0-py3-none-any.whl (171 kB)\n",
|
| 292 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m171.5/171.5 kB\u001b[0m \u001b[31m19.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 293 |
+
"\u001b[?25hRequirement already satisfied: tensorstore<0.1.46 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.1.45)\n",
|
| 294 |
+
"Collecting zarr (from nemo_toolkit[all])\n",
|
| 295 |
+
" Downloading zarr-2.18.2-py3-none-any.whl (210 kB)\n",
|
| 296 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m210.2/210.2 kB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 297 |
+
"\u001b[?25hCollecting attrdict (from nemo_toolkit[all])\n",
|
| 298 |
+
" Downloading attrdict-2.0.1-py2.py3-none-any.whl (9.9 kB)\n",
|
| 299 |
+
"Collecting kornia (from nemo_toolkit[all])\n",
|
| 300 |
+
" Downloading kornia-0.7.2-py2.py3-none-any.whl (825 kB)\n",
|
| 301 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m825.4/825.4 kB\u001b[0m \u001b[31m54.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 302 |
+
"\u001b[?25hCollecting pypinyin (from nemo_toolkit[all])\n",
|
| 303 |
+
" Downloading pypinyin-0.51.0-py2.py3-none-any.whl (1.4 MB)\n",
|
| 304 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m62.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 305 |
+
"\u001b[?25hCollecting pypinyin-dict (from nemo_toolkit[all])\n",
|
| 306 |
+
" Downloading pypinyin_dict-0.8.0-py2.py3-none-any.whl (9.5 MB)\n",
|
| 307 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.5/9.5 MB\u001b[0m \u001b[31m92.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 308 |
+
"\u001b[?25hCollecting progress>=1.5 (from nemo_toolkit[all])\n",
|
| 309 |
+
" Downloading progress-1.6.tar.gz (7.8 kB)\n",
|
| 310 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 311 |
+
"Requirement already satisfied: tabulate>=0.8.7 in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (0.9.0)\n",
|
| 312 |
+
"Collecting textdistance>=4.1.5 (from nemo_toolkit[all])\n",
|
| 313 |
+
" Downloading textdistance-4.6.2-py3-none-any.whl (31 kB)\n",
|
| 314 |
+
"Collecting addict (from nemo_toolkit[all])\n",
|
| 315 |
+
" Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)\n",
|
| 316 |
+
"Collecting clip (from nemo_toolkit[all])\n",
|
| 317 |
+
" Downloading clip-0.2.0.tar.gz (5.5 kB)\n",
|
| 318 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 319 |
+
"Collecting decord (from nemo_toolkit[all])\n",
|
| 320 |
+
" Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl (13.6 MB)\n",
|
| 321 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.6/13.6 MB\u001b[0m \u001b[31m74.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 322 |
+
"\u001b[?25hCollecting diffusers>=0.19.3 (from nemo_toolkit[all])\n",
|
| 323 |
+
" Downloading diffusers-0.28.0-py3-none-any.whl (2.2 MB)\n",
|
| 324 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m59.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 325 |
+
"\u001b[?25hCollecting einops-exts (from nemo_toolkit[all])\n",
|
| 326 |
+
" Downloading einops_exts-0.0.4-py3-none-any.whl (3.9 kB)\n",
|
| 327 |
+
"Requirement already satisfied: imageio in /usr/local/lib/python3.10/dist-packages (from nemo_toolkit[all]) (2.31.6)\n",
|
| 328 |
+
"Collecting nerfacc>=0.5.3 (from nemo_toolkit[all])\n",
|
| 329 |
+
" Downloading nerfacc-0.5.3-py3-none-any.whl (54 kB)\n",
|
| 330 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.6/54.6 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 331 |
+
"\u001b[?25hCollecting open-clip-torch (from nemo_toolkit[all])\n",
|
| 332 |
+
" Downloading open_clip_torch-2.24.0-py3-none-any.whl (1.5 MB)\n",
|
| 333 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m58.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 334 |
+
"\u001b[?25hCollecting PyMCubes (from nemo_toolkit[all])\n",
|
| 335 |
+
" Downloading PyMCubes-0.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (274 kB)\n",
|
| 336 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m274.3/274.3 kB\u001b[0m \u001b[31m19.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 337 |
+
"\u001b[?25hCollecting taming-transformers (from nemo_toolkit[all])\n",
|
| 338 |
+
" Downloading taming_transformers-0.0.1-py3-none-any.whl (45 kB)\n",
|
| 339 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.6/45.6 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 340 |
+
"\u001b[?25hCollecting torchdiffeq (from nemo_toolkit[all])\n",
|
| 341 |
+
" Downloading torchdiffeq-0.2.3-py3-none-any.whl (31 kB)\n",
|
| 342 |
+
"Collecting torchsde (from nemo_toolkit[all])\n",
|
| 343 |
+
" Downloading torchsde-0.2.6-py3-none-any.whl (61 kB)\n",
|
| 344 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.2/61.2 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 345 |
+
"\u001b[?25hCollecting trimesh (from nemo_toolkit[all])\n",
|
| 346 |
+
" Downloading trimesh-4.4.0-py3-none-any.whl (694 kB)\n",
|
| 347 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m694.6/694.6 kB\u001b[0m \u001b[31m39.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 348 |
+
"\u001b[?25hCollecting nemo-text-processing (from nemo_toolkit[all])\n",
|
| 349 |
+
" Downloading nemo_text_processing-1.0.2-py3-none-any.whl (2.6 MB)\n",
|
| 350 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m44.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 351 |
+
"\u001b[?25hCollecting mypy-extensions>=0.4.3 (from black~=24.3->nemo_toolkit[all])\n",
|
| 352 |
+
" Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n",
|
| 353 |
+
"Collecting pathspec>=0.9.0 (from black~=24.3->nemo_toolkit[all])\n",
|
| 354 |
+
" Downloading pathspec-0.12.1-py3-none-any.whl (31 kB)\n",
|
| 355 |
+
"Requirement already satisfied: platformdirs>=2 in /usr/local/lib/python3.10/dist-packages (from black~=24.3->nemo_toolkit[all]) (4.2.2)\n",
|
| 356 |
+
"Requirement already satisfied: tomli>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from black~=24.3->nemo_toolkit[all]) (2.0.1)\n",
|
| 357 |
+
"Requirement already satisfied: typing-extensions>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from black~=24.3->nemo_toolkit[all]) (4.11.0)\n",
|
| 358 |
+
"Collecting ninja (from causal-conv1d>=1.2.0->nemo_toolkit[all])\n",
|
| 359 |
+
" Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n",
|
| 360 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.2/307.2 kB\u001b[0m \u001b[31m25.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 361 |
+
"\u001b[?25hRequirement already satisfied: importlib-metadata in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (7.1.0)\n",
|
| 362 |
+
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (3.14.0)\n",
|
| 363 |
+
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (2023.12.25)\n",
|
| 364 |
+
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (2.31.0)\n",
|
| 365 |
+
"Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (0.4.3)\n",
|
| 366 |
+
"Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.19.3->nemo_toolkit[all]) (9.4.0)\n",
|
| 367 |
+
"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.20.3->nemo_toolkit[all]) (2023.6.0)\n",
|
| 368 |
+
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.20.3->nemo_toolkit[all]) (6.0.1)\n",
|
| 369 |
+
"Collecting antlr4-python3-runtime==4.9.* (from hydra-core<=1.3.2,>1.3->nemo_toolkit[all])\n",
|
| 370 |
+
" Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n",
|
| 371 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 372 |
+
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 373 |
+
"INFO: pip is looking at multiple versions of jiwer to determine which version is compatible with other requirements. This could take a while.\n",
|
| 374 |
+
"Collecting jiwer (from nemo_toolkit[all])\n",
|
| 375 |
+
" Downloading jiwer-3.0.3-py3-none-any.whl (21 kB)\n",
|
| 376 |
+
" Downloading jiwer-3.0.2-py3-none-any.whl (21 kB)\n",
|
| 377 |
+
" Downloading jiwer-3.0.1-py3-none-any.whl (21 kB)\n",
|
| 378 |
+
" Downloading jiwer-3.0.0-py3-none-any.whl (21 kB)\n",
|
| 379 |
+
" Downloading jiwer-2.6.0-py3-none-any.whl (20 kB)\n",
|
| 380 |
+
" Downloading jiwer-2.5.2-py3-none-any.whl (15 kB)\n",
|
| 381 |
+
"Collecting rapidfuzz (from nemo_toolkit[all])\n",
|
| 382 |
+
" Downloading rapidfuzz-2.13.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n",
|
| 383 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m63.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 384 |
+
"\u001b[?25hRequirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from lhotse>=1.22.0->nemo_toolkit[all]) (3.0.1)\n",
|
| 385 |
+
"Collecting cytoolz>=0.10.1 (from lhotse>=1.22.0->nemo_toolkit[all])\n",
|
| 386 |
+
" Downloading cytoolz-0.12.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n",
|
| 387 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m63.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 388 |
+
"\u001b[?25hCollecting intervaltree>=3.1.0 (from lhotse>=1.22.0->nemo_toolkit[all])\n",
|
| 389 |
+
" Downloading intervaltree-3.1.0.tar.gz (32 kB)\n",
|
| 390 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 391 |
+
"Collecting lilcom>=1.1.0 (from lhotse>=1.22.0->nemo_toolkit[all])\n",
|
| 392 |
+
" Downloading lilcom-1.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (87 kB)\n",
|
| 393 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.1/87.1 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 394 |
+
"\u001b[?25hRequirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (1.4.2)\n",
|
| 395 |
+
"Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (4.4.2)\n",
|
| 396 |
+
"Requirement already satisfied: pooch>=1.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (1.8.1)\n",
|
| 397 |
+
"Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (0.3.7)\n",
|
| 398 |
+
"Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (0.4)\n",
|
| 399 |
+
"Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->nemo_toolkit[all]) (1.0.8)\n",
|
| 400 |
+
"Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (1.2.1)\n",
|
| 401 |
+
"Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (0.12.1)\n",
|
| 402 |
+
"Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (4.51.0)\n",
|
| 403 |
+
"Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (1.4.5)\n",
|
| 404 |
+
"Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->nemo_toolkit[all]) (3.1.2)\n",
|
| 405 |
+
"Requirement already satisfied: rich>=12 in /usr/local/lib/python3.10/dist-packages (from nerfacc>=0.5.3->nemo_toolkit[all]) (13.7.1)\n",
|
| 406 |
+
"Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->nemo_toolkit[all]) (0.41.1)\n",
|
| 407 |
+
"Requirement already satisfied: protobuf>=3.20.2 in /usr/local/lib/python3.10/dist-packages (from onnx>=1.7.0->nemo_toolkit[all]) (3.20.3)\n",
|
| 408 |
+
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil->nemo_toolkit[all]) (1.16.0)\n",
|
| 409 |
+
"Collecting lightning-utilities>=0.8.0 (from pytorch-lightning>=2.2.1->nemo_toolkit[all])\n",
|
| 410 |
+
" Downloading lightning_utilities-0.11.2-py3-none-any.whl (26 kB)\n",
|
| 411 |
+
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->nemo_toolkit[all]) (3.5.0)\n",
|
| 412 |
+
"Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile->nemo_toolkit[all]) (1.16.0)\n",
|
| 413 |
+
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (1.12)\n",
|
| 414 |
+
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (3.3)\n",
|
| 415 |
+
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (3.1.4)\n",
|
| 416 |
+
"Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
|
| 417 |
+
" Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n",
|
| 418 |
+
"Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
|
| 419 |
+
" Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n",
|
| 420 |
+
"Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
|
| 421 |
+
" Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n",
|
| 422 |
+
"Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->nemo_toolkit[all])\n",
|
| 423 |
+
" Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n",
|
| 424 |
+
"Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->nemo_toolkit[all])\n",
|
| 425 |
+
" Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n",
|
| 426 |
+
"Collecting nvidia-cufft-cu12==11.0.2.54 (from torch->nemo_toolkit[all])\n",
|
| 427 |
+
" Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n",
|
| 428 |
+
"Collecting nvidia-curand-cu12==10.3.2.106 (from torch->nemo_toolkit[all])\n",
|
| 429 |
+
" Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n",
|
| 430 |
+
"Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch->nemo_toolkit[all])\n",
|
| 431 |
+
" Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n",
|
| 432 |
+
"Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch->nemo_toolkit[all])\n",
|
| 433 |
+
" Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n",
|
| 434 |
+
"Collecting nvidia-nccl-cu12==2.20.5 (from torch->nemo_toolkit[all])\n",
|
| 435 |
+
" Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n",
|
| 436 |
+
"Collecting nvidia-nvtx-cu12==12.1.105 (from torch->nemo_toolkit[all])\n",
|
| 437 |
+
" Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n",
|
| 438 |
+
"Requirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch->nemo_toolkit[all]) (2.3.0)\n",
|
| 439 |
+
"Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch->nemo_toolkit[all])\n",
|
| 440 |
+
" Downloading nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl (21.3 MB)\n",
|
| 441 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m57.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 442 |
+
"\u001b[?25hRequirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers<=4.40.2,>=4.36.0->nemo_toolkit[all]) (0.19.1)\n",
|
| 443 |
+
"Collecting botocore<1.35.0,>=1.34.113 (from boto3->nemo_toolkit[all])\n",
|
| 444 |
+
" Downloading botocore-1.34.113-py3-none-any.whl (12.3 MB)\n",
|
| 445 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m63.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 446 |
+
"\u001b[?25hCollecting jmespath<2.0.0,>=0.7.1 (from boto3->nemo_toolkit[all])\n",
|
| 447 |
+
" Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n",
|
| 448 |
+
"Collecting s3transfer<0.11.0,>=0.10.0 (from boto3->nemo_toolkit[all])\n",
|
| 449 |
+
" Downloading s3transfer-0.10.1-py3-none-any.whl (82 kB)\n",
|
| 450 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.2/82.2 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 451 |
+
"\u001b[?25hRequirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->nemo_toolkit[all]) (14.0.2)\n",
|
| 452 |
+
"Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets->nemo_toolkit[all]) (0.6)\n",
|
| 453 |
+
"Collecting dill<0.3.9,>=0.3.0 (from datasets->nemo_toolkit[all])\n",
|
| 454 |
+
" Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
|
| 455 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 456 |
+
"\u001b[?25hCollecting xxhash (from datasets->nemo_toolkit[all])\n",
|
| 457 |
+
" Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
|
| 458 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m17.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 459 |
+
"\u001b[?25hCollecting multiprocess (from datasets->nemo_toolkit[all])\n",
|
| 460 |
+
" Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
|
| 461 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m15.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 462 |
+
"\u001b[?25hRequirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->nemo_toolkit[all]) (3.9.5)\n",
|
| 463 |
+
"Collecting pybind11>=2.2 (from fasttext->nemo_toolkit[all])\n",
|
| 464 |
+
" Using cached pybind11-2.12.0-py3-none-any.whl (234 kB)\n",
|
| 465 |
+
"Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from fiddle->nemo_toolkit[all]) (1.4.0)\n",
|
| 466 |
+
"Requirement already satisfied: graphviz in /usr/local/lib/python3.10/dist-packages (from fiddle->nemo_toolkit[all]) (0.20.3)\n",
|
| 467 |
+
"Collecting libcst (from fiddle->nemo_toolkit[all])\n",
|
| 468 |
+
" Downloading libcst-1.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n",
|
| 469 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m72.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 470 |
+
"\u001b[?25hCollecting aniso8601>=0.82 (from flask-restful->nemo_toolkit[all])\n",
|
| 471 |
+
" Downloading aniso8601-9.0.1-py2.py3-none-any.whl (52 kB)\n",
|
| 472 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m52.8/52.8 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 473 |
+
"\u001b[?25hRequirement already satisfied: Flask>=0.8 in /usr/local/lib/python3.10/dist-packages (from flask-restful->nemo_toolkit[all]) (2.2.5)\n",
|
| 474 |
+
"Requirement already satisfied: pytz in /usr/local/lib/python3.10/dist-packages (from flask-restful->nemo_toolkit[all]) (2023.4)\n",
|
| 475 |
+
"Requirement already satisfied: wcwidth<0.3.0,>=0.2.12 in /usr/local/lib/python3.10/dist-packages (from ftfy->nemo_toolkit[all]) (0.2.13)\n",
|
| 476 |
+
"Collecting distance>=0.1.3 (from g2p-en->nemo_toolkit[all])\n",
|
| 477 |
+
" Downloading Distance-0.1.3.tar.gz (180 kB)\n",
|
| 478 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m180.3/180.3 kB\u001b[0m \u001b[31m16.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 479 |
+
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 480 |
+
"Requirement already satisfied: pydantic>=1.9.1 in /usr/local/lib/python3.10/dist-packages (from inflect->nemo_toolkit[all]) (2.7.1)\n",
|
| 481 |
+
"Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from gdown->nemo_toolkit[all]) (4.12.3)\n",
|
| 482 |
+
"Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (5.5.6)\n",
|
| 483 |
+
"Requirement already satisfied: ipython-genutils~=0.2.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (0.2.0)\n",
|
| 484 |
+
"Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (5.7.1)\n",
|
| 485 |
+
"Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (3.6.6)\n",
|
| 486 |
+
"Requirement already satisfied: ipython>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (7.34.0)\n",
|
| 487 |
+
"Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->nemo_toolkit[all]) (3.0.10)\n",
|
| 488 |
+
"Collecting kornia-rs>=0.1.0 (from kornia->nemo_toolkit[all])\n",
|
| 489 |
+
" Downloading kornia_rs-0.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)\n",
|
| 490 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m75.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 491 |
+
"\u001b[?25hCollecting cdifflib (from nemo-text-processing->nemo_toolkit[all])\n",
|
| 492 |
+
" Downloading cdifflib-1.2.6.tar.gz (11 kB)\n",
|
| 493 |
+
" Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
|
| 494 |
+
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
|
| 495 |
+
" Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n",
|
| 496 |
+
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
|
| 497 |
+
"Collecting pynini==2.1.5 (from nemo-text-processing->nemo_toolkit[all])\n",
|
| 498 |
+
" Downloading pynini-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (161.3 MB)\n",
|
| 499 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m161.3/161.3 MB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 500 |
+
"\u001b[?25hRequirement already satisfied: Cython>=0.29 in /usr/local/lib/python3.10/dist-packages (from pynini==2.1.5->nemo-text-processing->nemo_toolkit[all]) (3.0.10)\n",
|
| 501 |
+
"Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from open-clip-torch->nemo_toolkit[all]) (0.18.0+cu121)\n",
|
| 502 |
+
"Collecting timm (from open-clip-torch->nemo_toolkit[all])\n",
|
| 503 |
+
" Downloading timm-1.0.3-py3-none-any.whl (2.3 MB)\n",
|
| 504 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m58.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 505 |
+
"\u001b[?25hRequirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->nemo_toolkit[all]) (2024.1)\n",
|
| 506 |
+
"Requirement already satisfied: sortedcontainers>=2.0.4 in /usr/local/lib/python3.10/dist-packages (from pyannote.core->nemo_toolkit[all]) (2.4.0)\n",
|
| 507 |
+
"Collecting pyannote.database>=4.0.1 (from pyannote.metrics->nemo_toolkit[all])\n",
|
| 508 |
+
" Downloading pyannote.database-5.1.0-py3-none-any.whl (48 kB)\n",
|
| 509 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m48.1/48.1 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 510 |
+
"\u001b[?25hCollecting docopt>=0.6.2 (from pyannote.metrics->nemo_toolkit[all])\n",
|
| 511 |
+
" Downloading docopt-0.6.2.tar.gz (25 kB)\n",
|
| 512 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 513 |
+
"Requirement already satisfied: future>=0.16.0 in /usr/local/lib/python3.10/dist-packages (from pyloudnorm->nemo_toolkit[all]) (0.18.3)\n",
|
| 514 |
+
"Requirement already satisfied: iniconfig in /usr/local/lib/python3.10/dist-packages (from pytest->nemo_toolkit[all]) (2.0.0)\n",
|
| 515 |
+
"Requirement already satisfied: pluggy<2.0,>=0.12 in /usr/local/lib/python3.10/dist-packages (from pytest->nemo_toolkit[all]) (1.5.0)\n",
|
| 516 |
+
"Requirement already satisfied: exceptiongroup>=1.0.0rc8 in /usr/local/lib/python3.10/dist-packages (from pytest->nemo_toolkit[all]) (1.2.1)\n",
|
| 517 |
+
"Collecting ruamel.yaml.clib>=0.2.7 (from ruamel.yaml->nemo_toolkit[all])\n",
|
| 518 |
+
" Downloading ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (526 kB)\n",
|
| 519 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m526.7/526.7 kB\u001b[0m \u001b[31m30.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 520 |
+
"\u001b[?25hCollecting portalocker (from sacrebleu->nemo_toolkit[all])\n",
|
| 521 |
+
" Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)\n",
|
| 522 |
+
"Collecting colorama (from sacrebleu->nemo_toolkit[all])\n",
|
| 523 |
+
" Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n",
|
| 524 |
+
"Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from sacrebleu->nemo_toolkit[all]) (4.9.4)\n",
|
| 525 |
+
"Requirement already satisfied: sphinxcontrib-applehelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.8)\n",
|
| 526 |
+
"Requirement already satisfied: sphinxcontrib-devhelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.6)\n",
|
| 527 |
+
"Requirement already satisfied: sphinxcontrib-jsmath in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.1)\n",
|
| 528 |
+
"Requirement already satisfied: sphinxcontrib-htmlhelp>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.0.5)\n",
|
| 529 |
+
"Requirement already satisfied: sphinxcontrib-serializinghtml>=1.1.5 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.1.10)\n",
|
| 530 |
+
"Requirement already satisfied: sphinxcontrib-qthelp in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.0.7)\n",
|
| 531 |
+
"Requirement already satisfied: Pygments>=2.0 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.16.1)\n",
|
| 532 |
+
"Requirement already satisfied: docutils<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (0.18.1)\n",
|
| 533 |
+
"Requirement already satisfied: snowballstemmer>=1.1 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.2.0)\n",
|
| 534 |
+
"Requirement already satisfied: babel>=1.3 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (2.15.0)\n",
|
| 535 |
+
"Requirement already satisfied: alabaster<0.8,>=0.7 in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (0.7.16)\n",
|
| 536 |
+
"Requirement already satisfied: imagesize in /usr/local/lib/python3.10/dist-packages (from sphinx->nemo_toolkit[all]) (1.4.1)\n",
|
| 537 |
+
"Collecting docutils<0.19,>=0.14 (from sphinx->nemo_toolkit[all])\n",
|
| 538 |
+
" Downloading docutils-0.17.1-py2.py3-none-any.whl (575 kB)\n",
|
| 539 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m575.5/575.5 kB\u001b[0m \u001b[31m34.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 540 |
+
"\u001b[?25hCollecting pybtex>=0.24 (from sphinxcontrib-bibtex->nemo_toolkit[all])\n",
|
| 541 |
+
" Downloading pybtex-0.24.0-py2.py3-none-any.whl (561 kB)\n",
|
| 542 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m561.4/561.4 kB\u001b[0m \u001b[31m32.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 543 |
+
"\u001b[?25hCollecting pybtex-docutils>=1.0.0 (from sphinxcontrib-bibtex->nemo_toolkit[all])\n",
|
| 544 |
+
" Downloading pybtex_docutils-1.0.3-py3-none-any.whl (6.4 kB)\n",
|
| 545 |
+
"Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (1.64.0)\n",
|
| 546 |
+
"Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (2.27.0)\n",
|
| 547 |
+
"Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (1.2.0)\n",
|
| 548 |
+
"Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (3.6)\n",
|
| 549 |
+
"Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (0.7.2)\n",
|
| 550 |
+
"Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard->nemo_toolkit[all]) (3.0.3)\n",
|
| 551 |
+
"Collecting plac (from texterrors->nemo_toolkit[all])\n",
|
| 552 |
+
" Downloading plac-1.4.3-py2.py3-none-any.whl (22 kB)\n",
|
| 553 |
+
"Collecting loguru (from texterrors->nemo_toolkit[all])\n",
|
| 554 |
+
" Downloading loguru-0.7.2-py3-none-any.whl (62 kB)\n",
|
| 555 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.5/62.5 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 556 |
+
"\u001b[?25hRequirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from texterrors->nemo_toolkit[all]) (2.4.0)\n",
|
| 557 |
+
"Collecting Levenshtein (from texterrors->nemo_toolkit[all])\n",
|
| 558 |
+
" Downloading Levenshtein-0.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (177 kB)\n",
|
| 559 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 560 |
+
"\u001b[?25hCollecting trampoline>=0.1.2 (from torchsde->nemo_toolkit[all])\n",
|
| 561 |
+
" Downloading trampoline-0.1.2-py3-none-any.whl (5.2 kB)\n",
|
| 562 |
+
"Collecting docker-pycreds>=0.4.0 (from wandb->nemo_toolkit[all])\n",
|
| 563 |
+
" Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n",
|
| 564 |
+
"Collecting gitpython!=3.1.29,>=1.0.0 (from wandb->nemo_toolkit[all])\n",
|
| 565 |
+
" Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)\n",
|
| 566 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.3/207.3 kB\u001b[0m \u001b[31m19.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 567 |
+
"\u001b[?25hRequirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb->nemo_toolkit[all]) (5.9.5)\n",
|
| 568 |
+
"Collecting sentry-sdk>=1.0.0 (from wandb->nemo_toolkit[all])\n",
|
| 569 |
+
" Downloading sentry_sdk-2.3.1-py2.py3-none-any.whl (289 kB)\n",
|
| 570 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m289.0/289.0 kB\u001b[0m \u001b[31m25.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 571 |
+
"\u001b[?25hCollecting setproctitle (from wandb->nemo_toolkit[all])\n",
|
| 572 |
+
" Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)\n",
|
| 573 |
+
"Collecting asciitree (from zarr->nemo_toolkit[all])\n",
|
| 574 |
+
" Downloading asciitree-0.3.3.tar.gz (4.0 kB)\n",
|
| 575 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 576 |
+
"Collecting numcodecs>=0.10.0 (from zarr->nemo_toolkit[all])\n",
|
| 577 |
+
" Downloading numcodecs-0.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.7 MB)\n",
|
| 578 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.7/7.7 MB\u001b[0m \u001b[31m86.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 579 |
+
"\u001b[?25hCollecting fasteners (from zarr->nemo_toolkit[all])\n",
|
| 580 |
+
" Downloading fasteners-0.19-py3-none-any.whl (18 kB)\n",
|
| 581 |
+
"Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /usr/local/lib/python3.10/dist-packages (from botocore<1.35.0,>=1.34.113->boto3->nemo_toolkit[all]) (2.0.7)\n",
|
| 582 |
+
"Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile->nemo_toolkit[all]) (2.22)\n",
|
| 583 |
+
"Requirement already satisfied: toolz>=0.8.0 in /usr/local/lib/python3.10/dist-packages (from cytoolz>=0.10.1->lhotse>=1.22.0->nemo_toolkit[all]) (0.12.1)\n",
|
| 584 |
+
"Requirement already satisfied: itsdangerous>=2.0 in /usr/local/lib/python3.10/dist-packages (from Flask>=0.8->flask-restful->nemo_toolkit[all]) (2.2.0)\n",
|
| 585 |
+
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (1.3.1)\n",
|
| 586 |
+
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (23.2.0)\n",
|
| 587 |
+
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (1.4.1)\n",
|
| 588 |
+
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (6.0.5)\n",
|
| 589 |
+
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (1.9.4)\n",
|
| 590 |
+
"Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->nemo_toolkit[all]) (4.0.3)\n",
|
| 591 |
+
"Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.29,>=1.0.0->wandb->nemo_toolkit[all])\n",
|
| 592 |
+
" Downloading gitdb-4.0.11-py3-none-any.whl (62 kB)\n",
|
| 593 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 594 |
+
"\u001b[?25hRequirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (5.3.3)\n",
|
| 595 |
+
"Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (0.4.0)\n",
|
| 596 |
+
"Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (4.9)\n",
|
| 597 |
+
"Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<2,>=0.5->tensorboard->nemo_toolkit[all]) (1.3.1)\n",
|
| 598 |
+
"Requirement already satisfied: jupyter-client in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets->nemo_toolkit[all]) (6.1.12)\n",
|
| 599 |
+
"Requirement already satisfied: tornado>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets->nemo_toolkit[all]) (6.3.3)\n",
|
| 600 |
+
"Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all])\n",
|
| 601 |
+
" Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)\n",
|
| 602 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━���━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m64.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 603 |
+
"\u001b[?25hRequirement already satisfied: pickleshare in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.7.5)\n",
|
| 604 |
+
"Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (3.0.43)\n",
|
| 605 |
+
"Requirement already satisfied: backcall in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.2.0)\n",
|
| 606 |
+
"Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.1.7)\n",
|
| 607 |
+
"Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (4.9.0)\n",
|
| 608 |
+
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->nemo_toolkit[all]) (2.1.5)\n",
|
| 609 |
+
"Collecting typer>=0.12.1 (from pyannote.database>=4.0.1->pyannote.metrics->nemo_toolkit[all])\n",
|
| 610 |
+
" Downloading typer-0.12.3-py3-none-any.whl (47 kB)\n",
|
| 611 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 612 |
+
"\u001b[?25hCollecting latexcodec>=1.0.4 (from pybtex>=0.24->sphinxcontrib-bibtex->nemo_toolkit[all])\n",
|
| 613 |
+
" Downloading latexcodec-3.0.0-py3-none-any.whl (18 kB)\n",
|
| 614 |
+
"Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9.1->inflect->nemo_toolkit[all]) (0.7.0)\n",
|
| 615 |
+
"Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9.1->inflect->nemo_toolkit[all]) (2.18.2)\n",
|
| 616 |
+
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (3.3.2)\n",
|
| 617 |
+
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (3.7)\n",
|
| 618 |
+
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (2024.2.2)\n",
|
| 619 |
+
"Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=12->nerfacc>=0.5.3->nemo_toolkit[all]) (3.0.0)\n",
|
| 620 |
+
"Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->nemo_toolkit[all]) (1.3.0)\n",
|
| 621 |
+
"Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.10/dist-packages (from widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (6.5.5)\n",
|
| 622 |
+
"Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->gdown->nemo_toolkit[all]) (2.5)\n",
|
| 623 |
+
"Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata->diffusers>=0.19.3->nemo_toolkit[all]) (3.18.2)\n",
|
| 624 |
+
"INFO: pip is looking at multiple versions of levenshtein to determine which version is compatible with other requirements. This could take a while.\n",
|
| 625 |
+
"Collecting Levenshtein (from texterrors->nemo_toolkit[all])\n",
|
| 626 |
+
" Downloading Levenshtein-0.25.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (177 kB)\n",
|
| 627 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m16.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 628 |
+
"\u001b[?25h Downloading Levenshtein-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (177 kB)\n",
|
| 629 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m19.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 630 |
+
"\u001b[?25h Downloading Levenshtein-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (169 kB)\n",
|
| 631 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m169.4/169.4 kB\u001b[0m \u001b[31m19.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 632 |
+
"\u001b[?25h Downloading Levenshtein-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (172 kB)\n",
|
| 633 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m172.9/172.9 kB\u001b[0m \u001b[31m16.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
| 634 |
+
"\u001b[?25hRequirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from requests->diffusers>=0.19.3->nemo_toolkit[all]) (1.7.1)\n",
|
| 635 |
+
"Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb->nemo_toolkit[all])\n",
|
| 636 |
+
" Downloading smmap-5.0.1-py3-none-any.whl (24 kB)\n",
|
| 637 |
+
"Requirement already satisfied: parso<0.9.0,>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from jedi>=0.16->ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.8.4)\n",
|
| 638 |
+
"Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=12->nerfacc>=0.5.3->nemo_toolkit[all]) (0.1.2)\n",
|
| 639 |
+
"Requirement already satisfied: pyzmq<25,>=17 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (24.0.1)\n",
|
| 640 |
+
"Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (23.1.0)\n",
|
| 641 |
+
"Requirement already satisfied: jupyter-core>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (5.7.2)\n",
|
| 642 |
+
"Requirement already satisfied: nbformat in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (5.10.4)\n",
|
| 643 |
+
"Requirement already satisfied: nbconvert>=5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (6.5.4)\n",
|
| 644 |
+
"Requirement already satisfied: nest-asyncio>=1.5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.6.0)\n",
|
| 645 |
+
"Requirement already satisfied: Send2Trash>=1.8.0 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.8.3)\n",
|
| 646 |
+
"Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.18.1)\n",
|
| 647 |
+
"Requirement already satisfied: prometheus-client in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.20.0)\n",
|
| 648 |
+
"Requirement already satisfied: nbclassic>=0.4.7 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.0.0)\n",
|
| 649 |
+
"Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.10/dist-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets->nemo_toolkit[all]) (0.7.0)\n",
|
| 650 |
+
"Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->nemo_toolkit[all]) (0.6.0)\n",
|
| 651 |
+
"Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard->nemo_toolkit[all]) (3.2.2)\n",
|
| 652 |
+
"Collecting shellingham>=1.3.0 (from typer>=0.12.1->pyannote.database>=4.0.1->pyannote.metrics->nemo_toolkit[all])\n",
|
| 653 |
+
" Downloading shellingham-1.5.4-py2.py3-none-any.whl (9.8 kB)\n",
|
| 654 |
+
"Requirement already satisfied: jupyter-server>=1.8 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.24.0)\n",
|
| 655 |
+
"Requirement already satisfied: notebook-shim>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.2.4)\n",
|
| 656 |
+
"Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (6.1.0)\n",
|
| 657 |
+
"Requirement already satisfied: defusedxml in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.7.1)\n",
|
| 658 |
+
"Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.4)\n",
|
| 659 |
+
"Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.3.0)\n",
|
| 660 |
+
"Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.8.4)\n",
|
| 661 |
+
"Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.10.0)\n",
|
| 662 |
+
"Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.5.1)\n",
|
| 663 |
+
"Requirement already satisfied: tinycss2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.3.0)\n",
|
| 664 |
+
"Requirement already satisfied: fastjsonschema>=2.15 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (2.19.1)\n",
|
| 665 |
+
"Requirement already satisfied: jsonschema>=2.6 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (4.19.2)\n",
|
| 666 |
+
"Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.10/dist-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (21.2.0)\n",
|
| 667 |
+
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (2023.12.1)\n",
|
| 668 |
+
"Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.35.1)\n",
|
| 669 |
+
"Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.18.1)\n",
|
| 670 |
+
"Requirement already satisfied: anyio<4,>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (3.7.1)\n",
|
| 671 |
+
"Requirement already satisfied: websocket-client in /usr/local/lib/python3.10/dist-packages (from jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.8.0)\n",
|
| 672 |
+
"Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (0.5.1)\n",
|
| 673 |
+
"Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server>=1.8->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->nemo_toolkit[all]) (1.3.1)\n",
|
| 674 |
+
"Building wheels for collected packages: causal-conv1d, antlr4-python3-runtime, progress, clip, fasttext, kaldi-python-io, nemo_toolkit, rouge-score, sox, distance, docopt, intervaltree, asciitree, cdifflib\n",
|
| 675 |
+
" Building wheel for causal-conv1d (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 676 |
+
" Created wheel for causal-conv1d: filename=causal_conv1d-1.2.2.post1-cp310-cp310-linux_x86_64.whl size=103643300 sha256=2bba8823ae89bd79c2d067978e0e533fab8298f69855bfc5d199828b278cf66c\n",
|
| 677 |
+
" Stored in directory: /root/.cache/pip/wheels/22/a7/db/0c9482dec3707ad23181b0eb2da40e4b8f26aaed49752fc49f\n",
|
| 678 |
+
" Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 679 |
+
" Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144554 sha256=5ba620ca9da88d714c879b4a21820b9bdebd36fb76051b0b48a375e2e4f0fcb7\n",
|
| 680 |
+
" Stored in directory: /root/.cache/pip/wheels/12/93/dd/1f6a127edc45659556564c5730f6d4e300888f4bca2d4c5a88\n",
|
| 681 |
+
" Building wheel for progress (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 682 |
+
" Created wheel for progress: filename=progress-1.6-py3-none-any.whl size=9614 sha256=8102705b8ef612530f059a82dde5ea899c85e387fb8c5e956ed0fef5f2929103\n",
|
| 683 |
+
" Stored in directory: /root/.cache/pip/wheels/a2/68/5f/c339b20a41659d856c93ccdce6a33095493eb82c3964aac5a1\n",
|
| 684 |
+
" Building wheel for clip (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 685 |
+
" Created wheel for clip: filename=clip-0.2.0-py3-none-any.whl size=6989 sha256=d8ab11e4cbc0837cde86e4c7011ffabab187b9937f98e39480bb87ec75a34740\n",
|
| 686 |
+
" Stored in directory: /root/.cache/pip/wheels/7f/5c/e6/2c0fdb453a3569188864b17e9676bea8b3b7e160c037117869\n",
|
| 687 |
+
" Building wheel for fasttext (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 688 |
+
" Created wheel for fasttext: filename=fasttext-0.9.2-cp310-cp310-linux_x86_64.whl size=4227136 sha256=71dc3f2989afb1a6f206ee64ae86bfcfa8381c66960e93ac984be24f2871c66b\n",
|
| 689 |
+
" Stored in directory: /root/.cache/pip/wheels/a5/13/75/f811c84a8ab36eedbaef977a6a58a98990e8e0f1967f98f394\n",
|
| 690 |
+
" Building wheel for kaldi-python-io (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 691 |
+
" Created wheel for kaldi-python-io: filename=kaldi_python_io-1.2.2-py3-none-any.whl size=8949 sha256=959e8f93e517267e62e51f1e26455214c6b2aba320bb5621fa506730d4ad2ceb\n",
|
| 692 |
+
" Stored in directory: /root/.cache/pip/wheels/b7/23/5f/49d3a826be576faf61d84e8028e1914bb36a5586ee2613b087\n",
|
| 693 |
+
" Building wheel for nemo_toolkit (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
|
| 694 |
+
" Created wheel for nemo_toolkit: filename=nemo_toolkit-2.0.0rc1-py3-none-any.whl size=3709778 sha256=458c9cb158a12a8ddc8c570fde72f15111afe5767ac2a0e485966d2d76e1bda8\n",
|
| 695 |
+
" Stored in directory: /tmp/pip-ephem-wheel-cache-992hxcpb/wheels/c3/4e/45/ab3d29aa73df619f27b371cacf809d5330a18f794879163c1b\n",
|
| 696 |
+
" Building wheel for rouge-score (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 697 |
+
" Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=076fda87c1a21e7a9fe88f0b3b9a26f7b76171063d76812353b4a30ebe02da51\n",
|
| 698 |
+
" Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n",
|
| 699 |
+
" Building wheel for sox (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 700 |
+
" Created wheel for sox: filename=sox-1.5.0-py3-none-any.whl size=40038 sha256=717f5186772b8ce84cbbf7b1a01931be688fc591982575b518b2bc327460675d\n",
|
| 701 |
+
" Stored in directory: /root/.cache/pip/wheels/74/e7/7b/8033be3ec5e4994595d01269fc9657c8fd83a0dcbf8536666a\n",
|
| 702 |
+
" Building wheel for distance (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 703 |
+
" Created wheel for distance: filename=Distance-0.1.3-py3-none-any.whl size=16258 sha256=4863022ee11d6ede70f4b4362c6554629a2e734cb7a0a0212904aeafac36f78e\n",
|
| 704 |
+
" Stored in directory: /root/.cache/pip/wheels/e8/bb/de/f71bf63559ea9a921059a5405806f7ff6ed612a9231c4a9309\n",
|
| 705 |
+
" Building wheel for docopt (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 706 |
+
" Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13706 sha256=80b5355530de1ea759d79fc19047cdd59679e6b0a014b51dbca811111b1aad36\n",
|
| 707 |
+
" Stored in directory: /root/.cache/pip/wheels/fc/ab/d4/5da2067ac95b36618c629a5f93f809425700506f72c9732fac\n",
|
| 708 |
+
" Building wheel for intervaltree (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 709 |
+
" Created wheel for intervaltree: filename=intervaltree-3.1.0-py2.py3-none-any.whl size=26096 sha256=e831b80cc0232f925c293997e7be035697c58f5a834060f1b1f6a097fa5502b7\n",
|
| 710 |
+
" Stored in directory: /root/.cache/pip/wheels/fa/80/8c/43488a924a046b733b64de3fac99252674c892a4c3801c0a61\n",
|
| 711 |
+
" Building wheel for asciitree (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
| 712 |
+
" Created wheel for asciitree: filename=asciitree-0.3.3-py3-none-any.whl size=5034 sha256=483b21d8a257179dcd00b430bc43c6fce9f97d0442b44433ac2794d3030a48e1\n",
|
| 713 |
+
" Stored in directory: /root/.cache/pip/wheels/7f/4e/be/1171b40f43b918087657ec57cf3b81fa1a2e027d8755baa184\n",
|
| 714 |
+
" Building wheel for cdifflib (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
|
| 715 |
+
" Created wheel for cdifflib: filename=cdifflib-1.2.6-cp310-cp310-linux_x86_64.whl size=27681 sha256=a4929dc925e36d0e71a89e124ce85f0ada2ec5862708eb18c9136da35649ebc1\n",
|
| 716 |
+
" Stored in directory: /root/.cache/pip/wheels/87/a7/fd/8061e24ed08689045cb6d1ca303768dc463b20a5a338174841\n",
|
| 717 |
+
"Successfully built causal-conv1d antlr4-python3-runtime progress clip fasttext kaldi-python-io nemo_toolkit rouge-score sox distance docopt intervaltree asciitree cdifflib\n",
|
| 718 |
+
"Installing collected packages: trampoline, pydub, progress, plac, pangu, opencc, ninja, ijson, docopt, distance, clip, braceexpand, asciitree, antlr4-python3-runtime, aniso8601, addict, xxhash, webdataset, trimesh, textdistance, sox, smmap, shellingham, setproctitle, sentry-sdk, ruamel.yaml.clib, rapidfuzz, pytest-runner, pypinyin, pynini, pybind11, portalocker, pathspec, parameterized, onnx, omegaconf, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, numcodecs, mypy-extensions, marshmallow, markdown2, loguru, lilcom, lightning-utilities, libcst, latexcodec, kornia-rs, kaldiio, kaldi-python-io, jmespath, jedi, isort, intervaltree, ftfy, fasteners, faiss-cpu, einops, docutils, docker-pycreds, dill, decord, cytoolz, colorama, click, cdifflib, attrdict, zarr, sacremoses, sacrebleu, ruamel.yaml, resampy, pytest-mock, pypinyin-dict, PyMCubes, pyloudnorm, pybtex, pyannote.core, nvidia-cusparse-cu12, nvidia-cudnn-cu12, multiprocess, Levenshtein, jiwer, hydra-core, gitdb, fiddle, fasttext, einops-exts, botocore, black, typer, texterrors, s3transfer, rouge-score, pybtex-docutils, nvidia-cusolver-cu12, lhotse, gitpython, flask-restful, diffusers, wandb, transformers, sphinxcontrib-bibtex, pyannote.database, g2p-en, datasets, boto3, torchsde, torchmetrics, torchdiffeq, sentence-transformers, pyannote.metrics, nerfacc, nemo_toolkit, nemo-text-processing, kornia, causal-conv1d, accelerated-scan, timm, pytorch-lightning, taming-transformers, open-clip-torch\n",
|
| 719 |
+
" Attempting uninstall: docutils\n",
|
| 720 |
+
" Found existing installation: docutils 0.18.1\n",
|
| 721 |
+
" Uninstalling docutils-0.18.1:\n",
|
| 722 |
+
" Successfully uninstalled docutils-0.18.1\n",
|
| 723 |
+
" Attempting uninstall: click\n",
|
| 724 |
+
" Found existing installation: click 8.1.7\n",
|
| 725 |
+
" Uninstalling click-8.1.7:\n",
|
| 726 |
+
" Successfully uninstalled click-8.1.7\n",
|
| 727 |
+
" Attempting uninstall: typer\n",
|
| 728 |
+
" Found existing installation: typer 0.9.4\n",
|
| 729 |
+
" Uninstalling typer-0.9.4:\n",
|
| 730 |
+
" Successfully uninstalled typer-0.9.4\n",
|
| 731 |
+
" Attempting uninstall: transformers\n",
|
| 732 |
+
" Found existing installation: transformers 4.41.0\n",
|
| 733 |
+
" Uninstalling transformers-4.41.0:\n",
|
| 734 |
+
" Successfully uninstalled transformers-4.41.0\n",
|
| 735 |
+
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
| 736 |
+
"spacy 3.7.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n",
|
| 737 |
+
"weasel 0.3.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\u001b[0m\u001b[31m\n",
|
| 738 |
+
"\u001b[0mSuccessfully installed Levenshtein-0.22.0 PyMCubes-0.1.4 accelerated-scan-0.2.0 addict-2.4.0 aniso8601-9.0.1 antlr4-python3-runtime-4.9.3 asciitree-0.3.3 attrdict-2.0.1 black-24.4.2 boto3-1.34.113 botocore-1.34.113 braceexpand-0.1.7 causal-conv1d-1.2.2.post1 cdifflib-1.2.6 click-8.0.2 clip-0.2.0 colorama-0.4.6 cytoolz-0.12.3 datasets-2.19.1 decord-0.6.0 diffusers-0.28.0 dill-0.3.8 distance-0.1.3 docker-pycreds-0.4.0 docopt-0.6.2 docutils-0.17.1 einops-0.8.0 einops-exts-0.0.4 faiss-cpu-1.8.0 fasteners-0.19 fasttext-0.9.2 fiddle-0.3.0 flask-restful-0.3.10 ftfy-6.2.0 g2p-en-2.1.0 gitdb-4.0.11 gitpython-3.1.43 hydra-core-1.3.2 ijson-3.2.3 intervaltree-3.1.0 isort-5.13.2 jedi-0.19.1 jiwer-2.5.2 jmespath-1.0.1 kaldi-python-io-1.2.2 kaldiio-2.18.0 kornia-0.7.2 kornia-rs-0.1.3 latexcodec-3.0.0 lhotse-1.23.0 libcst-1.4.0 lightning-utilities-0.11.2 lilcom-1.7 loguru-0.7.2 markdown2-2.4.13 marshmallow-3.21.2 multiprocess-0.70.16 mypy-extensions-1.0.0 nemo-text-processing-1.0.2 nemo_toolkit-2.0.0rc1 nerfacc-0.5.3 ninja-1.11.1.1 numcodecs-0.12.1 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.5.40 nvidia-nvtx-cu12-12.1.105 omegaconf-2.3.0 onnx-1.16.1 open-clip-torch-2.24.0 opencc-1.1.6 pangu-4.0.6.1 parameterized-0.9.0 pathspec-0.12.1 plac-1.4.3 portalocker-2.8.2 progress-1.6 pyannote.core-5.0.0 pyannote.database-5.1.0 pyannote.metrics-3.2.1 pybind11-2.12.0 pybtex-0.24.0 pybtex-docutils-1.0.3 pydub-0.25.1 pyloudnorm-0.1.1 pynini-2.1.5 pypinyin-0.51.0 pypinyin-dict-0.8.0 pytest-mock-3.14.0 pytest-runner-6.0.1 pytorch-lightning-2.2.5 rapidfuzz-2.13.7 resampy-0.4.3 rouge-score-0.1.2 ruamel.yaml-0.18.6 ruamel.yaml.clib-0.2.8 s3transfer-0.10.1 sacrebleu-2.4.2 sacremoses-0.1.1 sentence-transformers-2.7.0 sentry-sdk-2.3.1 setproctitle-1.3.3 shellingham-1.5.4 smmap-5.0.1 sox-1.5.0 sphinxcontrib-bibtex-2.6.2 taming-transformers-0.0.1 textdistance-4.6.2 texterrors-0.4.4 timm-1.0.3 torchdiffeq-0.2.3 torchmetrics-1.4.0.post0 torchsde-0.2.6 trampoline-0.1.2 transformers-4.40.2 trimesh-4.4.0 typer-0.12.3 wandb-0.17.0 webdataset-0.2.86 xxhash-3.4.1 zarr-2.18.2\n"
|
| 739 |
+
]
|
| 740 |
+
}
|
| 741 |
+
],
|
| 742 |
+
"source": [
|
| 743 |
+
"!pip install wget\n",
|
| 744 |
+
"!apt-get install sox libsndfile1 ffmpeg\n",
|
| 745 |
+
"\n",
|
| 746 |
+
"!python -m pip install git+https://github.com/NVIDIA/NeMo.git@1fa961ba03ab5f8c91b278640e29807079373372#egg=nemo_toolkit[all]"
|
| 747 |
+
]
|
| 748 |
+
},
|
| 749 |
+
{
|
| 750 |
+
"cell_type": "code",
|
| 751 |
+
"source": [
|
| 752 |
+
"import hydra\n",
|
| 753 |
+
"import soundfile as sf\n",
|
| 754 |
+
"import torch\n",
|
| 755 |
+
"from omegaconf import OmegaConf"
|
| 756 |
+
],
|
| 757 |
+
"metadata": {
|
| 758 |
+
"id": "cBz_fQ6KbzrZ"
|
| 759 |
+
},
|
| 760 |
+
"execution_count": null,
|
| 761 |
+
"outputs": []
|
| 762 |
+
},
|
| 763 |
+
{
|
| 764 |
+
"cell_type": "markdown",
|
| 765 |
+
"source": [
|
| 766 |
+
"### Downloading config, weights and audio example"
|
| 767 |
+
],
|
| 768 |
+
"metadata": {
|
| 769 |
+
"id": "3Fvy0phvhr0G"
|
| 770 |
+
}
|
| 771 |
+
},
|
| 772 |
+
{
|
| 773 |
+
"cell_type": "code",
|
| 774 |
+
"source": [
|
| 775 |
+
"import locale\n",
|
| 776 |
+
"\n",
|
| 777 |
+
"locale.getpreferredencoding = lambda: \"UTF-8\"\n",
|
| 778 |
+
"\n",
|
| 779 |
+
"# Loading weights, config and example wav for CTC-model\n",
|
| 780 |
+
"!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ssl_model_weights.ckpt\n",
|
| 781 |
+
"!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/encoder_config.yaml\n",
|
| 782 |
+
"!wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/example.wav"
|
| 783 |
+
],
|
| 784 |
+
"metadata": {
|
| 785 |
+
"colab": {
|
| 786 |
+
"base_uri": "https://localhost:8080/"
|
| 787 |
+
},
|
| 788 |
+
"id": "0EHgk_I6hrGI",
|
| 789 |
+
"outputId": "4c9ac38d-eeca-4da4-af26-aa41becfed00"
|
| 790 |
+
},
|
| 791 |
+
"execution_count": null,
|
| 792 |
+
"outputs": [
|
| 793 |
+
{
|
| 794 |
+
"output_type": "stream",
|
| 795 |
+
"name": "stdout",
|
| 796 |
+
"text": [
|
| 797 |
+
"--2024-05-28 07:12:41-- https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/example.wav\n",
|
| 798 |
+
"Resolving n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)... 37.230.193.192\n",
|
| 799 |
+
"Connecting to n-ws-q0bez.s3pd12.sbercloud.ru (n-ws-q0bez.s3pd12.sbercloud.ru)|37.230.193.192|:443... connected.\n",
|
| 800 |
+
"HTTP request sent, awaiting response... 200 OK\n",
|
| 801 |
+
"Length: 361324 (353K) [application/octet-stream]\n",
|
| 802 |
+
"Saving to: ‘example.wav’\n",
|
| 803 |
+
"\n",
|
| 804 |
+
"example.wav 100%[===================>] 352.86K 583KB/s in 0.6s \n",
|
| 805 |
+
"\n",
|
| 806 |
+
"2024-05-28 07:12:42 (583 KB/s) - ‘example.wav’ saved [361324/361324]\n",
|
| 807 |
+
"\n"
|
| 808 |
+
]
|
| 809 |
+
}
|
| 810 |
+
]
|
| 811 |
+
},
|
| 812 |
+
{
|
| 813 |
+
"cell_type": "markdown",
|
| 814 |
+
"source": [
|
| 815 |
+
"### Model instantiating and *inference*"
|
| 816 |
+
],
|
| 817 |
+
"metadata": {
|
| 818 |
+
"id": "FUA6Ah1blyHv"
|
| 819 |
+
}
|
| 820 |
+
},
|
| 821 |
+
{
|
| 822 |
+
"cell_type": "code",
|
| 823 |
+
"source": [
|
| 824 |
+
"class SpecScaler(torch.nn.Module):\n",
|
| 825 |
+
" def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
|
| 826 |
+
" return torch.log(x.clamp_(1e-9, 1e9))\n",
|
| 827 |
+
"\n",
|
| 828 |
+
"\n",
|
| 829 |
+
"device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
|
| 830 |
+
"encoder_config = \"encoder_config.yaml\"\n",
|
| 831 |
+
"model_weights = \"ssl_model_weights.ckpt\"\n",
|
| 832 |
+
"audio_path = \"example.wav\"\n",
|
| 833 |
+
"\n",
|
| 834 |
+
"conf = OmegaConf.load(encoder_config)\n",
|
| 835 |
+
"\n",
|
| 836 |
+
"encoder = hydra.utils.instantiate(conf.encoder)\n",
|
| 837 |
+
"ckpt = torch.load(model_weights, map_location=\"cpu\")\n",
|
| 838 |
+
"encoder.load_state_dict(ckpt, strict=True)\n",
|
| 839 |
+
"encoder.to(device)\n",
|
| 840 |
+
"\n",
|
| 841 |
+
"feature_extractor = hydra.utils.instantiate(conf.feature_extractor)\n",
|
| 842 |
+
"\n",
|
| 843 |
+
"audio_signal, _ = sf.read(audio_path, dtype=\"float32\")\n",
|
| 844 |
+
"features = feature_extractor(torch.tensor(audio_signal).float())\n",
|
| 845 |
+
"features = features.to(device)\n",
|
| 846 |
+
"\n",
|
| 847 |
+
"encoded, _ = encoder.forward(\n",
|
| 848 |
+
" audio_signal=features.unsqueeze(0),\n",
|
| 849 |
+
" length=torch.tensor([features.shape[-1]]).to(device),\n",
|
| 850 |
+
")\n",
|
| 851 |
+
"print(f\"encoded signal shape: {encoded.shape}\")"
|
| 852 |
+
],
|
| 853 |
+
"metadata": {
|
| 854 |
+
"colab": {
|
| 855 |
+
"base_uri": "https://localhost:8080/"
|
| 856 |
+
},
|
| 857 |
+
"id": "AsUapeJKh3cz",
|
| 858 |
+
"outputId": "ee5bc82b-4526-4364-ef7c-decd59cdbc5f"
|
| 859 |
+
},
|
| 860 |
+
"execution_count": null,
|
| 861 |
+
"outputs": [
|
| 862 |
+
{
|
| 863 |
+
"output_type": "stream",
|
| 864 |
+
"name": "stdout",
|
| 865 |
+
"text": [
|
| 866 |
+
"encoded signal shape: torch.Size([1, 768, 283])\n"
|
| 867 |
+
]
|
| 868 |
+
}
|
| 869 |
+
]
|
| 870 |
+
},
|
| 871 |
+
{
|
| 872 |
+
"cell_type": "code",
|
| 873 |
+
"source": [],
|
| 874 |
+
"metadata": {
|
| 875 |
+
"id": "p1yWHEU5Dn60"
|
| 876 |
+
},
|
| 877 |
+
"execution_count": null,
|
| 878 |
+
"outputs": []
|
| 879 |
+
}
|
| 880 |
+
]
|
| 881 |
+
}
|
Examples/notebooks/GigaAM_RNNT_Model_Usage_Example.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Examples/rnnt_inference.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
import torchaudio
|
| 5 |
+
from nemo.collections.asr.models import EncDecRNNTBPEModel
|
| 6 |
+
from nemo.collections.asr.modules.audio_preprocessing import (
|
| 7 |
+
AudioToMelSpectrogramPreprocessor as NeMoAudioToMelSpectrogramPreprocessor,
|
| 8 |
+
)
|
| 9 |
+
from nemo.collections.asr.parts.preprocessing.features import (
|
| 10 |
+
FilterbankFeaturesTA as NeMoFilterbankFeaturesTA,
|
| 11 |
+
)
|
| 12 |
+
from omegaconf import OmegaConf, open_dict
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class FilterbankFeaturesTA(NeMoFilterbankFeaturesTA):
|
| 16 |
+
def __init__(self, mel_scale: str = "htk", wkwargs=None, **kwargs):
|
| 17 |
+
if "window_size" in kwargs:
|
| 18 |
+
del kwargs["window_size"]
|
| 19 |
+
if "window_stride" in kwargs:
|
| 20 |
+
del kwargs["window_stride"]
|
| 21 |
+
|
| 22 |
+
super().__init__(**kwargs)
|
| 23 |
+
|
| 24 |
+
self._mel_spec_extractor = torchaudio.transforms.MelSpectrogram(
|
| 25 |
+
sample_rate=self._sample_rate,
|
| 26 |
+
win_length=self.win_length,
|
| 27 |
+
hop_length=self.hop_length,
|
| 28 |
+
n_mels=kwargs["nfilt"],
|
| 29 |
+
window_fn=self.torch_windows[kwargs["window"]],
|
| 30 |
+
mel_scale=mel_scale,
|
| 31 |
+
norm=kwargs["mel_norm"],
|
| 32 |
+
n_fft=kwargs["n_fft"],
|
| 33 |
+
f_max=kwargs.get("highfreq", None),
|
| 34 |
+
f_min=kwargs.get("lowfreq", 0),
|
| 35 |
+
wkwargs=wkwargs,
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class AudioToMelSpectrogramPreprocessor(NeMoAudioToMelSpectrogramPreprocessor):
|
| 40 |
+
def __init__(self, mel_scale: str = "htk", **kwargs):
|
| 41 |
+
super().__init__(**kwargs)
|
| 42 |
+
kwargs["nfilt"] = kwargs["features"]
|
| 43 |
+
del kwargs["features"]
|
| 44 |
+
self.featurizer = (
|
| 45 |
+
FilterbankFeaturesTA( # Deprecated arguments; kept for config compatibility
|
| 46 |
+
mel_scale=mel_scale,
|
| 47 |
+
**kwargs,
|
| 48 |
+
)
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _parse_args():
|
| 53 |
+
parser = argparse.ArgumentParser(
|
| 54 |
+
description="Run inference using GigaAM-RNNT checkpoint"
|
| 55 |
+
)
|
| 56 |
+
parser.add_argument(
|
| 57 |
+
"--model_config", help="Path to GigaAM-RNNT config file (.yaml)"
|
| 58 |
+
)
|
| 59 |
+
parser.add_argument(
|
| 60 |
+
"--model_weights", help="Path to GigaAM-RNNT checkpoint file (.ckpt)"
|
| 61 |
+
)
|
| 62 |
+
parser.add_argument("--tokenizer_path", help="Path to tokenizer directory")
|
| 63 |
+
parser.add_argument("--audio_path", help="Path to audio signal")
|
| 64 |
+
parser.add_argument("--device", help="Device: cpu / cuda")
|
| 65 |
+
return parser.parse_args()
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def main(
|
| 69 |
+
model_config: str,
|
| 70 |
+
model_weights: str,
|
| 71 |
+
tokenizer_path: str,
|
| 72 |
+
device: str,
|
| 73 |
+
audio_path: str,
|
| 74 |
+
):
|
| 75 |
+
config = OmegaConf.load(model_config)
|
| 76 |
+
with open_dict(config):
|
| 77 |
+
config.tokenizer.dir = tokenizer_path
|
| 78 |
+
|
| 79 |
+
model = EncDecRNNTBPEModel.from_config_dict(config)
|
| 80 |
+
|
| 81 |
+
ckpt = torch.load(model_weights, map_location="cpu")
|
| 82 |
+
model.load_state_dict(ckpt, strict=False)
|
| 83 |
+
model = model.to(device)
|
| 84 |
+
model.eval()
|
| 85 |
+
|
| 86 |
+
transcription = model.transcribe([audio_path])[0][0]
|
| 87 |
+
print(f"transcription: {transcription}")
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
if __name__ == "__main__":
|
| 91 |
+
args = _parse_args()
|
| 92 |
+
main(
|
| 93 |
+
model_config=args.model_config,
|
| 94 |
+
model_weights=args.model_weights,
|
| 95 |
+
tokenizer_path=args.tokenizer_path,
|
| 96 |
+
device=args.device,
|
| 97 |
+
audio_path=args.audio_path,
|
| 98 |
+
)
|
Examples/rnnt_longform_inference.py
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
from io import BytesIO
|
| 3 |
+
from typing import List, Tuple
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
import torch
|
| 7 |
+
import torchaudio
|
| 8 |
+
from nemo.collections.asr.models import EncDecRNNTBPEModel
|
| 9 |
+
from nemo.collections.asr.modules.audio_preprocessing import (
|
| 10 |
+
AudioToMelSpectrogramPreprocessor as NeMoAudioToMelSpectrogramPreprocessor,
|
| 11 |
+
)
|
| 12 |
+
from nemo.collections.asr.parts.preprocessing.features import (
|
| 13 |
+
FilterbankFeaturesTA as NeMoFilterbankFeaturesTA,
|
| 14 |
+
)
|
| 15 |
+
from omegaconf import OmegaConf, open_dict
|
| 16 |
+
from pyannote.audio import Pipeline
|
| 17 |
+
from pydub import AudioSegment
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class FilterbankFeaturesTA(NeMoFilterbankFeaturesTA):
|
| 21 |
+
def __init__(self, mel_scale: str = "htk", wkwargs=None, **kwargs):
|
| 22 |
+
if "window_size" in kwargs:
|
| 23 |
+
del kwargs["window_size"]
|
| 24 |
+
if "window_stride" in kwargs:
|
| 25 |
+
del kwargs["window_stride"]
|
| 26 |
+
|
| 27 |
+
super().__init__(**kwargs)
|
| 28 |
+
|
| 29 |
+
self._mel_spec_extractor = torchaudio.transforms.MelSpectrogram(
|
| 30 |
+
sample_rate=self._sample_rate,
|
| 31 |
+
win_length=self.win_length,
|
| 32 |
+
hop_length=self.hop_length,
|
| 33 |
+
n_mels=kwargs["nfilt"],
|
| 34 |
+
window_fn=self.torch_windows[kwargs["window"]],
|
| 35 |
+
mel_scale=mel_scale,
|
| 36 |
+
norm=kwargs["mel_norm"],
|
| 37 |
+
n_fft=kwargs["n_fft"],
|
| 38 |
+
f_max=kwargs.get("highfreq", None),
|
| 39 |
+
f_min=kwargs.get("lowfreq", 0),
|
| 40 |
+
wkwargs=wkwargs,
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class AudioToMelSpectrogramPreprocessor(NeMoAudioToMelSpectrogramPreprocessor):
|
| 45 |
+
def __init__(self, mel_scale: str = "htk", **kwargs):
|
| 46 |
+
super().__init__(**kwargs)
|
| 47 |
+
kwargs["nfilt"] = kwargs["features"]
|
| 48 |
+
del kwargs["features"]
|
| 49 |
+
self.featurizer = (
|
| 50 |
+
FilterbankFeaturesTA( # Deprecated arguments; kept for config compatibility
|
| 51 |
+
mel_scale=mel_scale,
|
| 52 |
+
**kwargs,
|
| 53 |
+
)
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def audiosegment_to_numpy(audiosegment: AudioSegment) -> np.ndarray:
|
| 58 |
+
"""Convert AudioSegment to numpy array."""
|
| 59 |
+
samples = np.array(audiosegment.get_array_of_samples())
|
| 60 |
+
if audiosegment.channels == 2:
|
| 61 |
+
samples = samples.reshape((-1, 2))
|
| 62 |
+
|
| 63 |
+
samples = samples.astype(np.float32, order="C") / 32768.0
|
| 64 |
+
return samples
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def format_time(seconds: float) -> str:
|
| 68 |
+
hours = int(seconds // 3600)
|
| 69 |
+
minutes = int((seconds % 3600) // 60)
|
| 70 |
+
seconds = seconds % 60
|
| 71 |
+
full_seconds = int(seconds)
|
| 72 |
+
milliseconds = int((seconds - full_seconds) * 100)
|
| 73 |
+
|
| 74 |
+
if hours > 0:
|
| 75 |
+
return f"{hours:02}:{minutes:02}:{full_seconds:02}:{milliseconds:02}"
|
| 76 |
+
else:
|
| 77 |
+
return f"{minutes:02}:{full_seconds:02}:{milliseconds:02}"
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def segment_audio(
|
| 81 |
+
audio_path: str,
|
| 82 |
+
pipeline: Pipeline,
|
| 83 |
+
max_duration: float = 22.0,
|
| 84 |
+
min_duration: float = 15.0,
|
| 85 |
+
new_chunk_threshold: float = 0.2,
|
| 86 |
+
) -> Tuple[List[np.ndarray], List[List[float]]]:
|
| 87 |
+
# Prepare audio for pyannote vad pipeline
|
| 88 |
+
audio = AudioSegment.from_wav(audio_path)
|
| 89 |
+
audio_bytes = BytesIO()
|
| 90 |
+
audio.export(audio_bytes, format="wav")
|
| 91 |
+
audio_bytes.seek(0)
|
| 92 |
+
|
| 93 |
+
# Process audio with pipeline to obtain segments with speech activity
|
| 94 |
+
sad_segments = pipeline({"uri": "filename", "audio": audio_bytes})
|
| 95 |
+
|
| 96 |
+
segments = []
|
| 97 |
+
curr_duration = 0
|
| 98 |
+
curr_start = 0
|
| 99 |
+
curr_end = 0
|
| 100 |
+
boundaries = []
|
| 101 |
+
|
| 102 |
+
# Concat segments from pipeline into chunks for asr according to max/min duration
|
| 103 |
+
for segment in sad_segments.get_timeline().support():
|
| 104 |
+
start = max(0, segment.start)
|
| 105 |
+
end = min(len(audio) / 1000, segment.end)
|
| 106 |
+
if (
|
| 107 |
+
curr_duration > min_duration and start - curr_end > new_chunk_threshold
|
| 108 |
+
) or (curr_duration + (end - curr_end) > max_duration):
|
| 109 |
+
audio_segment = audiosegment_to_numpy(
|
| 110 |
+
audio[curr_start * 1000 : curr_end * 1000]
|
| 111 |
+
)
|
| 112 |
+
segments.append(audio_segment)
|
| 113 |
+
boundaries.append([curr_start, curr_end])
|
| 114 |
+
curr_start = start
|
| 115 |
+
|
| 116 |
+
curr_end = end
|
| 117 |
+
curr_duration = curr_end - curr_start
|
| 118 |
+
|
| 119 |
+
if curr_duration != 0:
|
| 120 |
+
audio_segment = audiosegment_to_numpy(
|
| 121 |
+
audio[curr_start * 1000 : curr_end * 1000]
|
| 122 |
+
)
|
| 123 |
+
segments.append(audio_segment)
|
| 124 |
+
boundaries.append([curr_start, curr_end])
|
| 125 |
+
|
| 126 |
+
return segments, boundaries
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def _parse_args():
|
| 130 |
+
parser = argparse.ArgumentParser(
|
| 131 |
+
description="Run long-form inference using GigaAM-RNNT checkpoint"
|
| 132 |
+
)
|
| 133 |
+
parser.add_argument(
|
| 134 |
+
"--model_config", help="Path to GigaAM-RNNT config file (.yaml)"
|
| 135 |
+
)
|
| 136 |
+
parser.add_argument(
|
| 137 |
+
"--model_weights", help="Path to GigaAM-RNNT checkpoint file (.ckpt)"
|
| 138 |
+
)
|
| 139 |
+
parser.add_argument("--tokenizer_path", help="Path to tokenizer directory")
|
| 140 |
+
parser.add_argument("--audio_path", help="Path to audio signal")
|
| 141 |
+
parser.add_argument(
|
| 142 |
+
"--hf_token", help="HuggingFace token for using pyannote Pipeline"
|
| 143 |
+
)
|
| 144 |
+
parser.add_argument("--device", help="Device: cpu / cuda")
|
| 145 |
+
parser.add_argument("--fp16", help="Run in FP16 mode", default=True)
|
| 146 |
+
parser.add_argument(
|
| 147 |
+
"--batch_size", help="Batch size for acoustic model inference", default=10
|
| 148 |
+
)
|
| 149 |
+
return parser.parse_args()
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def main(
|
| 153 |
+
model_config: str,
|
| 154 |
+
model_weights: str,
|
| 155 |
+
tokenizer_path: str,
|
| 156 |
+
device: str,
|
| 157 |
+
audio_path: str,
|
| 158 |
+
hf_token: str,
|
| 159 |
+
fp16: bool,
|
| 160 |
+
batch_size: int = 10,
|
| 161 |
+
):
|
| 162 |
+
# Initialize model
|
| 163 |
+
config = OmegaConf.load(model_config)
|
| 164 |
+
with open_dict(config):
|
| 165 |
+
config.tokenizer.dir = tokenizer_path
|
| 166 |
+
|
| 167 |
+
model = EncDecRNNTBPEModel.from_config_dict(config)
|
| 168 |
+
ckpt = torch.load(model_weights, map_location="cpu")
|
| 169 |
+
model.load_state_dict(ckpt, strict=False)
|
| 170 |
+
model = model.to(device)
|
| 171 |
+
if device != "cpu" and fp16:
|
| 172 |
+
model = model.half()
|
| 173 |
+
model.preprocessor = model.preprocessor.float()
|
| 174 |
+
model.eval()
|
| 175 |
+
|
| 176 |
+
# Initialize pyannote pipeline
|
| 177 |
+
pipeline = Pipeline.from_pretrained(
|
| 178 |
+
"pyannote/voice-activity-detection", use_auth_token=hf_token
|
| 179 |
+
)
|
| 180 |
+
pipeline = pipeline.to(torch.device(device))
|
| 181 |
+
|
| 182 |
+
# Segment audio
|
| 183 |
+
segments, boundaries = segment_audio(audio_path, pipeline)
|
| 184 |
+
|
| 185 |
+
# Transcribe segments
|
| 186 |
+
transcriptions = []
|
| 187 |
+
if device != "cpu" and fp16:
|
| 188 |
+
with torch.autocast(device_type="cuda", dtype=torch.float16):
|
| 189 |
+
transcriptions = model.transcribe(segments, batch_size=batch_size)[0]
|
| 190 |
+
else:
|
| 191 |
+
transcriptions = model.transcribe(segments, batch_size=batch_size)[0]
|
| 192 |
+
|
| 193 |
+
for transcription, boundary in zip(transcriptions, boundaries):
|
| 194 |
+
print(
|
| 195 |
+
f"[{format_time(boundary[0])} - {format_time(boundary[1])}]: {transcription}\n"
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
if __name__ == "__main__":
|
| 200 |
+
args = _parse_args()
|
| 201 |
+
main(
|
| 202 |
+
model_config=args.model_config,
|
| 203 |
+
model_weights=args.model_weights,
|
| 204 |
+
tokenizer_path=args.tokenizer_path,
|
| 205 |
+
device=args.device,
|
| 206 |
+
audio_path=args.audio_path,
|
| 207 |
+
hf_token=args.hf_token,
|
| 208 |
+
fp16=args.fp16,
|
| 209 |
+
batch_size=args.batch_size,
|
| 210 |
+
)
|
Examples/ssl_inference.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
|
| 3 |
+
import hydra
|
| 4 |
+
import soundfile
|
| 5 |
+
import torch
|
| 6 |
+
from omegaconf import OmegaConf
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class SpecScaler(torch.nn.Module):
|
| 10 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 11 |
+
return torch.log(x.clamp_(1e-9, 1e9))
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def _parse_args():
|
| 15 |
+
parser = argparse.ArgumentParser(
|
| 16 |
+
description="Run inference using GigaAM checkpoint"
|
| 17 |
+
)
|
| 18 |
+
parser.add_argument("--encoder_config", help="Path to GigaAM config file (.yaml)")
|
| 19 |
+
parser.add_argument(
|
| 20 |
+
"--model_weights", help="Path to GigaAM checkpoint file (.ckpt)"
|
| 21 |
+
)
|
| 22 |
+
parser.add_argument("--audio_path", help="Path to audio signal")
|
| 23 |
+
parser.add_argument("--device", help="Device: cpu / cuda")
|
| 24 |
+
return parser.parse_args()
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def main(encoder_config: str, model_weights: str, device: str, audio_path: str):
|
| 28 |
+
conf = OmegaConf.load(encoder_config)
|
| 29 |
+
|
| 30 |
+
encoder = hydra.utils.instantiate(conf.encoder)
|
| 31 |
+
ckpt = torch.load(model_weights, map_location="cpu")
|
| 32 |
+
encoder.load_state_dict(ckpt, strict=True)
|
| 33 |
+
encoder.to(device)
|
| 34 |
+
|
| 35 |
+
feature_extractor = hydra.utils.instantiate(conf.feature_extractor)
|
| 36 |
+
|
| 37 |
+
audio_signal, _ = soundfile.read(audio_path, dtype="float32")
|
| 38 |
+
features = feature_extractor(torch.tensor(audio_signal).float())
|
| 39 |
+
features = features.to(device)
|
| 40 |
+
|
| 41 |
+
encoded, _ = encoder.forward(
|
| 42 |
+
audio_signal=features.unsqueeze(0),
|
| 43 |
+
length=torch.tensor([features.shape[-1]]).to(device),
|
| 44 |
+
)
|
| 45 |
+
print(f"encoded signal shape: {encoded.shape}")
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
if __name__ == "__main__":
|
| 49 |
+
args = _parse_args()
|
| 50 |
+
main(
|
| 51 |
+
encoder_config=args.encoder_config,
|
| 52 |
+
model_weights=args.model_weights,
|
| 53 |
+
device=args.device,
|
| 54 |
+
audio_path=args.audio_path,
|
| 55 |
+
)
|
GigaAM-CTC/ctc_model_config.yaml
ADDED
|
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_class: enc_dec_ctc_char
|
| 2 |
+
sample_rate: 16000
|
| 3 |
+
log_prediction: true
|
| 4 |
+
ctc_reduction: mean_batch
|
| 5 |
+
labels:
|
| 6 |
+
- ' '
|
| 7 |
+
- а
|
| 8 |
+
- б
|
| 9 |
+
- в
|
| 10 |
+
- г
|
| 11 |
+
- д
|
| 12 |
+
- е
|
| 13 |
+
- ж
|
| 14 |
+
- з
|
| 15 |
+
- и
|
| 16 |
+
- й
|
| 17 |
+
- к
|
| 18 |
+
- л
|
| 19 |
+
- м
|
| 20 |
+
- н
|
| 21 |
+
- о
|
| 22 |
+
- п
|
| 23 |
+
- р
|
| 24 |
+
- с
|
| 25 |
+
- т
|
| 26 |
+
- у
|
| 27 |
+
- ф
|
| 28 |
+
- х
|
| 29 |
+
- ц
|
| 30 |
+
- ч
|
| 31 |
+
- ш
|
| 32 |
+
- щ
|
| 33 |
+
- ъ
|
| 34 |
+
- ы
|
| 35 |
+
- ь
|
| 36 |
+
- э
|
| 37 |
+
- ю
|
| 38 |
+
- я
|
| 39 |
+
|
| 40 |
+
preprocessor:
|
| 41 |
+
_target_: __main__.AudioToMelSpectrogramPreprocessor
|
| 42 |
+
sample_rate: 16000
|
| 43 |
+
n_fft: 400
|
| 44 |
+
n_window_size: 400
|
| 45 |
+
window_size: null
|
| 46 |
+
n_window_stride: 160
|
| 47 |
+
window_stride: null
|
| 48 |
+
features: 64
|
| 49 |
+
dither: 0.0
|
| 50 |
+
preemph: null
|
| 51 |
+
log: true
|
| 52 |
+
log_zero_guard_type: clamp
|
| 53 |
+
normalize: null
|
| 54 |
+
pad_to: 0
|
| 55 |
+
mel_norm: null
|
| 56 |
+
window: hann
|
| 57 |
+
log_zero_guard_value: 1e-9
|
| 58 |
+
|
| 59 |
+
train_ds:
|
| 60 |
+
batch_size: 10
|
| 61 |
+
trim_silence: false
|
| 62 |
+
max_duration: 25.0
|
| 63 |
+
min_duration: 0.1
|
| 64 |
+
shuffle: true
|
| 65 |
+
is_tarred: false
|
| 66 |
+
num_workers: 8
|
| 67 |
+
pin_memory: true
|
| 68 |
+
manifest_filepath: null
|
| 69 |
+
labels:
|
| 70 |
+
- ' '
|
| 71 |
+
- а
|
| 72 |
+
- б
|
| 73 |
+
- в
|
| 74 |
+
- г
|
| 75 |
+
- д
|
| 76 |
+
- е
|
| 77 |
+
- ж
|
| 78 |
+
- з
|
| 79 |
+
- и
|
| 80 |
+
- й
|
| 81 |
+
- к
|
| 82 |
+
- л
|
| 83 |
+
- м
|
| 84 |
+
- н
|
| 85 |
+
- о
|
| 86 |
+
- п
|
| 87 |
+
- р
|
| 88 |
+
- с
|
| 89 |
+
- т
|
| 90 |
+
- у
|
| 91 |
+
- ф
|
| 92 |
+
- х
|
| 93 |
+
- ц
|
| 94 |
+
- ч
|
| 95 |
+
- ш
|
| 96 |
+
- щ
|
| 97 |
+
- ъ
|
| 98 |
+
- ы
|
| 99 |
+
- ь
|
| 100 |
+
- э
|
| 101 |
+
- ю
|
| 102 |
+
- я
|
| 103 |
+
|
| 104 |
+
validation_ds:
|
| 105 |
+
batch_size: 20
|
| 106 |
+
shuffle: false
|
| 107 |
+
num_workers: 4
|
| 108 |
+
min_duration: 0.1
|
| 109 |
+
pin_memory: true
|
| 110 |
+
manifest_filepath: null
|
| 111 |
+
labels:
|
| 112 |
+
- ' '
|
| 113 |
+
- а
|
| 114 |
+
- б
|
| 115 |
+
- в
|
| 116 |
+
- г
|
| 117 |
+
- д
|
| 118 |
+
- е
|
| 119 |
+
- ж
|
| 120 |
+
- з
|
| 121 |
+
- и
|
| 122 |
+
- й
|
| 123 |
+
- к
|
| 124 |
+
- л
|
| 125 |
+
- м
|
| 126 |
+
- н
|
| 127 |
+
- о
|
| 128 |
+
- п
|
| 129 |
+
- р
|
| 130 |
+
- с
|
| 131 |
+
- т
|
| 132 |
+
- у
|
| 133 |
+
- ф
|
| 134 |
+
- х
|
| 135 |
+
- ц
|
| 136 |
+
- ч
|
| 137 |
+
- ш
|
| 138 |
+
- щ
|
| 139 |
+
- ъ
|
| 140 |
+
- ы
|
| 141 |
+
- ь
|
| 142 |
+
- э
|
| 143 |
+
- ю
|
| 144 |
+
- я
|
| 145 |
+
|
| 146 |
+
test_ds:
|
| 147 |
+
manifest_filepath: null
|
| 148 |
+
batch_size: 100
|
| 149 |
+
shuffle: false
|
| 150 |
+
num_workers: 4
|
| 151 |
+
pin_memory: true
|
| 152 |
+
labels:
|
| 153 |
+
- ' '
|
| 154 |
+
- а
|
| 155 |
+
- б
|
| 156 |
+
- в
|
| 157 |
+
- г
|
| 158 |
+
- д
|
| 159 |
+
- е
|
| 160 |
+
- ж
|
| 161 |
+
- з
|
| 162 |
+
- и
|
| 163 |
+
- й
|
| 164 |
+
- к
|
| 165 |
+
- л
|
| 166 |
+
- м
|
| 167 |
+
- н
|
| 168 |
+
- о
|
| 169 |
+
- п
|
| 170 |
+
- р
|
| 171 |
+
- с
|
| 172 |
+
- т
|
| 173 |
+
- у
|
| 174 |
+
- ф
|
| 175 |
+
- х
|
| 176 |
+
- ц
|
| 177 |
+
- ч
|
| 178 |
+
- ш
|
| 179 |
+
- щ
|
| 180 |
+
- ъ
|
| 181 |
+
- ы
|
| 182 |
+
- ь
|
| 183 |
+
- э
|
| 184 |
+
- ю
|
| 185 |
+
- я
|
| 186 |
+
spec_augment:
|
| 187 |
+
_target_: nemo.collections.asr.modules.SpectrogramAugmentation
|
| 188 |
+
freq_masks: 2
|
| 189 |
+
time_masks: 10
|
| 190 |
+
freq_width: 27
|
| 191 |
+
time_width: 0.05
|
| 192 |
+
encoder:
|
| 193 |
+
_target_: nemo.collections.asr.modules.ConformerEncoder
|
| 194 |
+
feat_in: 64
|
| 195 |
+
feat_out: -1
|
| 196 |
+
n_layers: 16
|
| 197 |
+
d_model: 768
|
| 198 |
+
subsampling: striding
|
| 199 |
+
subsampling_factor: 4
|
| 200 |
+
subsampling_conv_channels: 768
|
| 201 |
+
ff_expansion_factor: 4
|
| 202 |
+
self_attention_model: rel_pos
|
| 203 |
+
pos_emb_max_len: 5000
|
| 204 |
+
n_heads: 16
|
| 205 |
+
xscaling: false
|
| 206 |
+
untie_biases: true
|
| 207 |
+
conv_kernel_size: 31
|
| 208 |
+
dropout: 0.1
|
| 209 |
+
dropout_emb: 0.1
|
| 210 |
+
dropout_att: 0.1
|
| 211 |
+
decoder:
|
| 212 |
+
_target_: nemo.collections.asr.modules.ConvASRDecoder
|
| 213 |
+
feat_in: 768
|
| 214 |
+
num_classes: 33
|
| 215 |
+
vocabulary:
|
| 216 |
+
- ' '
|
| 217 |
+
- а
|
| 218 |
+
- б
|
| 219 |
+
- в
|
| 220 |
+
- г
|
| 221 |
+
- д
|
| 222 |
+
- е
|
| 223 |
+
- ж
|
| 224 |
+
- з
|
| 225 |
+
- и
|
| 226 |
+
- й
|
| 227 |
+
- к
|
| 228 |
+
- л
|
| 229 |
+
- м
|
| 230 |
+
- н
|
| 231 |
+
- о
|
| 232 |
+
- п
|
| 233 |
+
- р
|
| 234 |
+
- с
|
| 235 |
+
- т
|
| 236 |
+
- у
|
| 237 |
+
- ф
|
| 238 |
+
- х
|
| 239 |
+
- ц
|
| 240 |
+
- ч
|
| 241 |
+
- ш
|
| 242 |
+
- щ
|
| 243 |
+
- ъ
|
| 244 |
+
- ы
|
| 245 |
+
- ь
|
| 246 |
+
- э
|
| 247 |
+
- ю
|
| 248 |
+
- я
|
| 249 |
+
optim:
|
| 250 |
+
name: adamw
|
| 251 |
+
lr: 5.0e-05
|
| 252 |
+
betas:
|
| 253 |
+
- 0.9
|
| 254 |
+
- 0.98
|
| 255 |
+
weight_decay: 0.01
|
| 256 |
+
sched:
|
| 257 |
+
name: CosineAnnealing
|
| 258 |
+
warmup_steps: 10000
|
| 259 |
+
warmup_ratio: null
|
| 260 |
+
min_lr: 1.0e-07
|
| 261 |
+
nemo_version: 1.12.0
|
| 262 |
+
decoding:
|
| 263 |
+
strategy: greedy
|
| 264 |
+
preserve_alignments: null
|
| 265 |
+
compute_timestamps: null
|
| 266 |
+
word_seperator: ' '
|
| 267 |
+
ctc_timestamp_type: all
|
| 268 |
+
batch_dim_index: 0
|
| 269 |
+
greedy:
|
| 270 |
+
preserve_alignments: false
|
| 271 |
+
compute_timestamps: false
|
GigaAM-CTC/ctc_model_weights.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6813e52607414d6006ac30a392087cb8d716afce7e0319a38bcb744ba741d2dc
|
| 3 |
+
size 968535213
|
GigaAM-Emo/emo_model_config.yaml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id2name:
|
| 2 |
+
- 'angry'
|
| 3 |
+
- 'sad'
|
| 4 |
+
- 'neutral'
|
| 5 |
+
- 'positive'
|
| 6 |
+
|
| 7 |
+
feature_extractor:
|
| 8 |
+
_target_: torch.nn.Sequential
|
| 9 |
+
_args_:
|
| 10 |
+
- _target_: torchaudio.transforms.MelSpectrogram
|
| 11 |
+
sample_rate: 16000
|
| 12 |
+
n_fft: 400
|
| 13 |
+
win_length: 400
|
| 14 |
+
hop_length: 160
|
| 15 |
+
n_mels: 64
|
| 16 |
+
- _target_: __main__.SpecScaler
|
| 17 |
+
|
| 18 |
+
encoder:
|
| 19 |
+
_target_: nemo.collections.asr.modules.ConformerEncoder
|
| 20 |
+
feat_in: 64
|
| 21 |
+
feat_out: -1
|
| 22 |
+
n_layers: 16
|
| 23 |
+
d_model: 768
|
| 24 |
+
subsampling: striding
|
| 25 |
+
subsampling_factor: 4
|
| 26 |
+
subsampling_conv_channels: 768
|
| 27 |
+
ff_expansion_factor: 4
|
| 28 |
+
self_attention_model: rel_pos
|
| 29 |
+
pos_emb_max_len: 5000
|
| 30 |
+
n_heads: 16
|
| 31 |
+
xscaling: false
|
| 32 |
+
untie_biases: true
|
| 33 |
+
conv_kernel_size: 31
|
| 34 |
+
|
| 35 |
+
classification_head:
|
| 36 |
+
_target_: torch.nn.Linear
|
| 37 |
+
in_features: 768
|
| 38 |
+
out_features: 4
|
GigaAM-Emo/emo_model_weights.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a8530d7573e0f0cd78c48c91345bd67c09a8eb4b15913baab77590140b9ecb0
|
| 3 |
+
size 968409626
|
GigaAM-RNNT/rnnt_model_config.yaml
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_class: enc_dec_rnnt_bpe
|
| 2 |
+
sample_rate: 16000
|
| 3 |
+
log_prediction: true
|
| 4 |
+
model_defaults:
|
| 5 |
+
enc_hidden: 768
|
| 6 |
+
pred_hidden: 320
|
| 7 |
+
join_hidden: 320
|
| 8 |
+
|
| 9 |
+
preprocessor:
|
| 10 |
+
_target_: __main__.AudioToMelSpectrogramPreprocessor
|
| 11 |
+
sample_rate: 16000
|
| 12 |
+
n_fft: 400
|
| 13 |
+
n_window_size: 400
|
| 14 |
+
window_size: null
|
| 15 |
+
n_window_stride: 160
|
| 16 |
+
window_stride: null
|
| 17 |
+
features: 64
|
| 18 |
+
dither: 0.0
|
| 19 |
+
preemph: null
|
| 20 |
+
log: true
|
| 21 |
+
log_zero_guard_type: clamp
|
| 22 |
+
normalize: null
|
| 23 |
+
pad_to: 0
|
| 24 |
+
mel_norm: null
|
| 25 |
+
window: hann
|
| 26 |
+
log_zero_guard_value: 1e-9
|
| 27 |
+
|
| 28 |
+
tokenizer:
|
| 29 |
+
dir: tokenizer_all_sets/
|
| 30 |
+
type: bpe
|
| 31 |
+
|
| 32 |
+
validation_ds:
|
| 33 |
+
shuffle: False
|
| 34 |
+
manifest_filepath: null
|
| 35 |
+
|
| 36 |
+
encoder:
|
| 37 |
+
_target_: nemo.collections.asr.modules.ConformerEncoder
|
| 38 |
+
feat_in: 64
|
| 39 |
+
feat_out: -1
|
| 40 |
+
n_layers: 16
|
| 41 |
+
d_model: 768
|
| 42 |
+
subsampling: striding
|
| 43 |
+
subsampling_factor: 4
|
| 44 |
+
subsampling_conv_channels: 768
|
| 45 |
+
ff_expansion_factor: 4
|
| 46 |
+
self_attention_model: rel_pos
|
| 47 |
+
pos_emb_max_len: 5000
|
| 48 |
+
n_heads: 16
|
| 49 |
+
xscaling: false
|
| 50 |
+
untie_biases: true
|
| 51 |
+
conv_kernel_size: 31
|
| 52 |
+
dropout: 0.1
|
| 53 |
+
dropout_emb: 0.1
|
| 54 |
+
dropout_att: 0.1
|
| 55 |
+
decoder:
|
| 56 |
+
_target_: nemo.collections.asr.modules.RNNTDecoder
|
| 57 |
+
normalization_mode: null
|
| 58 |
+
random_state_sampling: false
|
| 59 |
+
blank_as_pad: true
|
| 60 |
+
vocab_size: 512
|
| 61 |
+
prednet:
|
| 62 |
+
pred_hidden: 320
|
| 63 |
+
pred_rnn_layers: 1
|
| 64 |
+
t_max: null
|
| 65 |
+
dropout: 0.0
|
| 66 |
+
joint:
|
| 67 |
+
_target_: nemo.collections.asr.modules.RNNTJoint
|
| 68 |
+
log_softmax: null
|
| 69 |
+
fuse_loss_wer: false
|
| 70 |
+
fused_batch_size: 1
|
| 71 |
+
jointnet:
|
| 72 |
+
joint_hidden: 320
|
| 73 |
+
activation: relu
|
| 74 |
+
dropout: 0.0
|
| 75 |
+
encoder_hidden: 768
|
| 76 |
+
optim:
|
| 77 |
+
name: adamw
|
| 78 |
+
lr: 5.0e-05
|
| 79 |
+
betas:
|
| 80 |
+
- 0.9
|
| 81 |
+
- 0.98
|
| 82 |
+
weight_decay: 0.01
|
| 83 |
+
sched:
|
| 84 |
+
name: CosineAnnealing
|
| 85 |
+
warmup_steps: 10000
|
| 86 |
+
warmup_ratio: null
|
| 87 |
+
min_lr: 1.0e-07
|
| 88 |
+
nemo_version: 1.12.0
|
| 89 |
+
decoding:
|
| 90 |
+
strategy: greedy_batch
|
| 91 |
+
preserve_alignments: false
|
| 92 |
+
greedy:
|
| 93 |
+
max_symbols: 3
|
| 94 |
+
beam:
|
| 95 |
+
beam_size: 5
|
| 96 |
+
score_norm: true
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
loss:
|
| 100 |
+
loss_name: default
|
| 101 |
+
mwer: false
|
| 102 |
+
rnnt_reduction: mean_batch
|
| 103 |
+
wer_coef: false
|
| 104 |
+
subtract_mean: true
|
| 105 |
+
warprnnt_numba_kwargs:
|
| 106 |
+
fastemit_lambda: 0.0
|
| 107 |
+
clamp: -1.0
|
| 108 |
+
rnnt_weight: 0.1
|
| 109 |
+
unique_hyp: true
|
GigaAM-RNNT/rnnt_model_weights.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9311712a085aba1b103c325f4965faa7b32e950bf0b724720103a94d204d2a9
|
| 3 |
+
size 974419733
|
GigaAM/encoder_config.yaml
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
feature_extractor:
|
| 2 |
+
_target_: torch.nn.Sequential
|
| 3 |
+
_args_:
|
| 4 |
+
- _target_: torchaudio.transforms.MelSpectrogram
|
| 5 |
+
sample_rate: 16000
|
| 6 |
+
n_fft: 400
|
| 7 |
+
win_length: 400
|
| 8 |
+
hop_length: 160
|
| 9 |
+
n_mels: 64
|
| 10 |
+
- _target_: __main__.SpecScaler
|
| 11 |
+
|
| 12 |
+
encoder:
|
| 13 |
+
_target_: nemo.collections.asr.modules.ConformerEncoder
|
| 14 |
+
feat_in: 64
|
| 15 |
+
feat_out: -1
|
| 16 |
+
n_layers: 16
|
| 17 |
+
d_model: 768
|
| 18 |
+
subsampling: striding
|
| 19 |
+
subsampling_factor: 4
|
| 20 |
+
subsampling_conv_channels: 768
|
| 21 |
+
ff_expansion_factor: 4
|
| 22 |
+
self_attention_model: rel_pos
|
| 23 |
+
pos_emb_max_len: 5000
|
| 24 |
+
n_heads: 16
|
| 25 |
+
xscaling: false
|
| 26 |
+
untie_biases: true
|
| 27 |
+
conv_kernel_size: 31
|
GigaAM/ssl_model_weights.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fea2e9cee640c931a159667c9f1d82519e789087966ed412c77c0b7e69a35073
|
| 3 |
+
size 968385941
|
README.md
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# GigaAM: the family of open-source acoustic models for speech processing
|
| 2 |
+
|
| 3 |
+

|
| 4 |
+
|
| 5 |
+
## Table of contents
|
| 6 |
+
|
| 7 |
+
* [GigaAM](#gigaam)
|
| 8 |
+
* [GigaAM for Speech Recognition](#gigaam-for-speech-recognition)
|
| 9 |
+
* [GigaAM-CTC](#gigaam-ctc)
|
| 10 |
+
* [GigaAM-RNNT](#gigaam-rnnt)
|
| 11 |
+
* [GigaAM-Emo](#gigaam-emo)
|
| 12 |
+
* [Links](#links)
|
| 13 |
+
|
| 14 |
+
## GigaAM
|
| 15 |
+
|
| 16 |
+
GigaAM (**Giga** **A**coustic **M**odel) is a [Conformer](https://arxiv.org/pdf/2005.08100.pdf)-based [wav2vec2](https://arxiv.org/pdf/2006.11477.pdf) foundational model (around 240M parameters). We trained GigaAM on nearly 50 thousand hours of diversified speech audio in the Russian language.
|
| 17 |
+
|
| 18 |
+
Resources:
|
| 19 |
+
* [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ssl_model_weights.ckpt)
|
| 20 |
+
* [Encoder config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/encoder_config.yaml)
|
| 21 |
+
* [Colab example](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_Model_Usage_Example.ipynb)
|
| 22 |
+
* [Docker example](./examples/README.md)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
## GigaAM for Speech Recognition
|
| 26 |
+
|
| 27 |
+
We fine-tuned the GigaAM encoder for Speech Recognition with two different decoders:
|
| 28 |
+
* GigaAM-CTC was fine-tunined with [Connectionist Temporal Classification](https://www.cs.toronto.edu/~graves/icml_2006.pdf) and character-based tokenizer.
|
| 29 |
+
* GigaAM-RNNT was fine-tuned with [RNN Transducer loss](https://arxiv.org/abs/1211.3711) and subword tokenizer.
|
| 30 |
+
|
| 31 |
+
Both models were trained using [the NeMo toolkit](https://github.com/NVIDIA/NeMo) on publicly available Russian labeled data:
|
| 32 |
+
|
| 33 |
+
| dataset | size, hours | weight |
|
| 34 |
+
| --- | --- | --- |
|
| 35 |
+
| [Golos](https://arxiv.org/pdf/2106.10161.pdf) | 1227 | 0.6 |
|
| 36 |
+
| [SOVA](https://github.com/sovaai/sova-dataset) | 369 | 0.2 |
|
| 37 |
+
| [Russian Common Voice](https://arxiv.org/pdf/1912.06670.pdf) | 207 | 0.1 |
|
| 38 |
+
| [Russian LibriSpeech](https://arxiv.org/pdf/2012.03411.pdf) | 93 | 0.1 |
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
Resources:
|
| 42 |
+
* ### GigaAM-CTC:
|
| 43 |
+
* [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ctc_model_weights.ckpt)
|
| 44 |
+
* [Model config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ctc_model_config.yaml)
|
| 45 |
+
* [Colab example](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_CTC_Model_Usage_Example.ipynb)
|
| 46 |
+
* [Docker example](./examples/README.md)
|
| 47 |
+
* ### GigaAM-RNNT:
|
| 48 |
+
* [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/rnnt_model_weights.ckpt)
|
| 49 |
+
* [Model config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/rnnt_model_config.yaml)
|
| 50 |
+
* [Colab example](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_RNNT_Model_Usage_Example.ipynb)
|
| 51 |
+
* [Docker examples](./examples/README.md)
|
| 52 |
+
|
| 53 |
+
The following table summarizes the performance of different models in terms of Word Error Rate on open Russian datasets:
|
| 54 |
+
|
| 55 |
+
| model | parameters | [Golos Crowd](https://arxiv.org/abs/2106.10161) | [Golos Farfield](https://arxiv.org/abs/2106.10161) | [OpenSTT Youtube](https://github.com/snakers4/open_stt) | [OpenSTT Phone calls](https://github.com/snakers4/open_stt) | [OpenSTT Audiobooks](https://github.com/snakers4/open_stt) | [Mozilla Common Voice](https://arxiv.org/pdf/1912.06670.pdf) | [Russian LibriSpeech](https://arxiv.org/pdf/2012.03411.pdf) |
|
| 56 |
+
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
| 57 |
+
| [Whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) | 1.5B | 17.4 | 14.5 | 21.1 | 31.2 | 17.0 | 5.3 | 9.0 |
|
| 58 |
+
| [NVIDIA Ru-FastConformer-RNNT](https://huggingface.co/nvidia/stt_ru_fastconformer_hybrid_large_pc) | 115M | 2.6 | 6.6 | 23.8 | 32.9 | 16.4 | 2.7 | 11.6 |
|
| 59 |
+
| GigaAM-CTC | 242M | 3.1 | 5.7 | 18.4 | 25.6 | 15.1| 1.7 | 8.1 |
|
| 60 |
+
| GigaAM-RNNT | 243M | <span style="color:green">2.3</span> | <span style="color:green">4.4</span> | <span style="color:green">16.7</span> | <span style="color:green">22.9</span> | <span style="color:green">13.9</span> | <span style="color:green">0.9</span> | <span style="color:green">7.4</span> |
|
| 61 |
+
|
| 62 |
+
## GigaAM-Emo
|
| 63 |
+
|
| 64 |
+
GigaAM-Emo is an acoustic model for Emotion Recognition. We fine-tuned the GigaAM Encoder on the [Dusha](https://arxiv.org/pdf/2212.12266.pdf) dataset.
|
| 65 |
+
|
| 66 |
+
Resources:
|
| 67 |
+
* [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_weights.ckpt)
|
| 68 |
+
* [Model config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_config.yaml)
|
| 69 |
+
* [Colab example](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_Emo_Model_Usage_Example.ipynb)
|
| 70 |
+
* [Docker example](./examples/README.md)
|
| 71 |
+
|
| 72 |
+
The following table summarizes the performance of different models on the [Dusha](https://arxiv.org/pdf/2212.12266.pdf) dataset:
|
| 73 |
+
|
| 74 |
+
| | | Crowd | | | Podcast | |
|
| 75 |
+
| --- | --- | --- | --- | --- | --- | --- |
|
| 76 |
+
| | Unweighted Accuracy | Weighted Accuracy | Macro F1-score | Unweighted Accuracy | Weighted Accuracy | Macro F1-score |
|
| 77 |
+
| [DUSHA](https://arxiv.org/pdf/2212.12266.pdf) baseline <br/> ([MobileNetV2](https://arxiv.org/abs/1801.04381) + [Self-Attention](https://arxiv.org/pdf/1805.08318.pdf)) | 0.83 | 0.76 | 0.77 | 0.89 | 0.53 | 0.54 |
|
| 78 |
+
| [АБК](https://aij.ru/archive?albumId=2&videoId=337) ([TIM-Net](https://arxiv.org/pdf/2211.08233.pdf)) | 0.84 | 0.77 | 0.78 | <span style="color:green">0.90</span> | 0.50 | 0.55 |
|
| 79 |
+
| GigaAM-Emo | <span style="color:green">0.90</span> | <span style="color:green">0.87</span> | <span style="color:green">0.84</span> | <span style="color:green">0.90</span> | <span style="color:green">0.76</span> | <span style="color:green">0.67</span> |
|
| 80 |
+
|
| 81 |
+
## Links
|
| 82 |
+
* [[habr] GigaAM: класс открытых моделей для обработки звучащей речи](https://habr.com/ru/companies/sberdevices/articles/805569)
|
| 83 |
+
* [[youtube] GigaAM: Семейство акустических моделей для русского языка](https://youtu.be/PvZuTUnZa2Q?t=26442)
|
| 84 |
+
* [[youtube] Speech-only Pre-training: обучение универсального аудиоэнкодера](https://www.youtube.com/watch?v=ktO4Mx6UMNk)
|
README_ru.md
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# GigaAM: семейство акустических моделей для обработки звучащей речи
|
| 2 |
+
|
| 3 |
+

|
| 4 |
+
|
| 5 |
+
## Содержание
|
| 6 |
+
|
| 7 |
+
* [GigaAM](#gigaam)
|
| 8 |
+
* [GigaAM для распознавания речи](#gigaam-для-распознавания-речи)
|
| 9 |
+
* [GigaAM-CTC](#gigaam-ctc)
|
| 10 |
+
* [GigaAM-RNNT](#gigaam-rnnt)
|
| 11 |
+
* [GigaAM-Emo](#gigaam-emo)
|
| 12 |
+
* [Ссылки](#ссылки)
|
| 13 |
+
|
| 14 |
+
## GigaAM
|
| 15 |
+
|
| 16 |
+
GigaAM (**Giga** **A**coustic **M**odel) — фундаментальная акустическая модель, основанная на [Conformer](https://arxiv.org/pdf/2005.08100.pdf) энкодере (около 240M параметров). Мы предобучали GigaAM в [wav2vec2](https://arxiv.org/pdf/2006.11477.pdf) режиме на 50 тысячах часов разнообразных русскоязычных данных.
|
| 17 |
+
|
| 18 |
+
Материалы
|
| 19 |
+
* [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ssl_model_weights.ckpt)
|
| 20 |
+
* [Encoder config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/encoder_config.yaml)
|
| 21 |
+
* [Пример использования в colab](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_Model_Usage_Example.ipynb)
|
| 22 |
+
* [Пример использования в docker](./examples/README.md)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
## GigaAM для распознавания речи
|
| 26 |
+
Мы дообучали GigaAM энкодер для задачи распознавания речи с двумя разными декодерами:
|
| 27 |
+
* GigaAM-CTC была дообучена с [CTC](https://www.cs.toronto.edu/~graves/icml_2006.pdf) функцией потерь и посимвольной токенизацией.
|
| 28 |
+
* GigaAM-RNNT была дообучена с [RNN-T](https://arxiv.org/abs/1211.3711) функцией потерь и subword-токенизацией.
|
| 29 |
+
|
| 30 |
+
Для обучения обеих моделей использовался [фреймворк NeMo](https://github.com/NVIDIA/NeMo) и следующие открытые данные:
|
| 31 |
+
|
| 32 |
+
| dataset | size, hours | weight |
|
| 33 |
+
| --- | --- | --- |
|
| 34 |
+
| [Golos](https://arxiv.org/pdf/2106.10161.pdf) | 1227 | 0.6 |
|
| 35 |
+
| [SOVA](https://github.com/sovaai/sova-dataset) | 369 | 0.2 |
|
| 36 |
+
| [Russian Common Voice](https://arxiv.org/pdf/1912.06670.pdf) | 207 | 0.1 |
|
| 37 |
+
| [Russian LibriSpeech](https://arxiv.org/pdf/2012.03411.pdf) | 93 | 0.1 |
|
| 38 |
+
|
| 39 |
+
Материалы:
|
| 40 |
+
* ### GigaAM-CTC:
|
| 41 |
+
* [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ctc_model_weights.ckpt)
|
| 42 |
+
* [Model config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ctc_model_config.yaml)
|
| 43 |
+
* [Пример использования в colab](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_CTC_Model_Usage_Example.ipynb)
|
| 44 |
+
* [Пример использования в docker](./examples/README.md)
|
| 45 |
+
* ### GigaAM-RNNT:
|
| 46 |
+
* [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/rnnt_model_weights.ckpt)
|
| 47 |
+
* [Model config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/rnnt_model_config.yaml)
|
| 48 |
+
* [Пример использования в colab](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_RNNT_Model_Usage_Example.ipynb)
|
| 49 |
+
* [Пример использования в docker](./examples/README.md)
|
| 50 |
+
|
| 51 |
+
В таблице ниже приведены оценки Word Error Rate различных моделей на открытых русскоязычных наборах данных:
|
| 52 |
+
|
| 53 |
+
| model | parameters | [Golos Crowd](https://arxiv.org/abs/2106.10161) | [Golos Farfield](https://arxiv.org/abs/2106.10161) | [OpenSTT Youtube](https://github.com/snakers4/open_stt) | [OpenSTT Phone calls](https://github.com/snakers4/open_stt) | [OpenSTT Audiobooks](https://github.com/snakers4/open_stt) | [Mozilla Common Voice](https://arxiv.org/pdf/1912.06670.pdf) | [Russian LibriSpeech](https://arxiv.org/pdf/2012.03411.pdf) |
|
| 54 |
+
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
| 55 |
+
| [Whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) | 1.5B | 17.4 | 14.5 | 21.1 | 31.2 | 17.0 | 5.3 | 9.0 |
|
| 56 |
+
| [NVIDIA Ru-FastConformer-RNNT](https://huggingface.co/nvidia/stt_ru_fastconformer_hybrid_large_pc) | 115M | 2.6 | 6.6 | 23.8 | 32.9 | 16.4 | 2.7 | 11.6 |
|
| 57 |
+
| GigaAM-CTC | 242M | 3.1 | 5.7 | 18.4 | 25.6 | 15.1| 1.7 | 8.1 |
|
| 58 |
+
| GigaAM-RNNT | 243M | <span style="color:green">2.3</span> | <span style="color:green">4.4</span> | <span style="color:green">16.7</span> | <span style="color:green">22.9</span> | <span style="color:green">13.9</span> | <span style="color:green">0.9</span> | <span style="color:green">7.4</span> |
|
| 59 |
+
|
| 60 |
+
## GigaAM-Emo
|
| 61 |
+
|
| 62 |
+
GigaAM-Emo — акустическая модель для определения эмоций. Мы доучивали GigaAM на датасете [Dusha](https://arxiv.org/pdf/2212.12266.pdf).
|
| 63 |
+
|
| 64 |
+
Материалы:
|
| 65 |
+
* [Model weights](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_weights.ckpt)
|
| 66 |
+
* [Model config](https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/emo_model_config.yaml)
|
| 67 |
+
* [Пример использования в colab](https://colab.research.google.com/github/salute-developers/GigaAM/blob/main/examples/notebooks/GigaAM_Emo_Model_Usage_Example.ipynb)
|
| 68 |
+
* [Пример использования в docker](./examples/README.md)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
В таблице ниже приведены метрики качества открытых моделей на датасете [Dusha](https://arxiv.org/pdf/2212.12266.pdf):
|
| 72 |
+
|
| 73 |
+
| | | Crowd | | | Podcast | |
|
| 74 |
+
| --- | --- | --- | --- | --- | --- | --- |
|
| 75 |
+
| | Unweighted Accuracy | Weighted Accuracy | Macro F1-score | Unweighted Accuracy | Weighted Accuracy | Macro F1-score |
|
| 76 |
+
| [DUSHA](https://arxiv.org/pdf/2212.12266.pdf) baseline <br/> ([MobileNetV2](https://arxiv.org/abs/1801.04381) + [Self-Attention](https://arxiv.org/pdf/1805.08318.pdf)) | 0.83 | 0.76 | 0.77 | 0.89 | 0.53 | 0.54 |
|
| 77 |
+
| [АБК](https://aij.ru/archive?albumId=2&videoId=337) ([TIM-Net](https://arxiv.org/pdf/2211.08233.pdf)) | 0.84 | 0.77 | 0.78 | <span style="color:green">0.90</span> | 0.50 | 0.55 |
|
| 78 |
+
| GigaAM-Emo | <span style="color:green">0.90</span> | <span style="color:green">0.87</span> | <span style="color:green">0.84</span> | <span style="color:green">0.90</span> | <span style="color:green">0.76</span> | <span style="color:green">0.67</span> |
|
| 79 |
+
|
| 80 |
+
## Ссылки
|
| 81 |
+
* [[habr] GigaAM: класс открытых моделей для обработки звучащей речи](https://habr.com/ru/companies/sberdevices/articles/805569)
|
| 82 |
+
* [[youtube] GigaAM: Семейство акустических моделей для русского языка](https://youtu.be/PvZuTUnZa2Q?t=26442)
|
| 83 |
+
* [[youtube] Speech-only Pre-training: обучение универсального аудиоэнкодера](https://www.youtube.com/watch?v=ktO4Mx6UMNk)
|
gigaam_scheme.svg
ADDED
|
|