hifigan_for_sherpa
Browse files- .gitattributes +15 -0
- hifigan_for_sherpa/.gitattributes +35 -0
- hifigan_for_sherpa/README.md +20 -0
- hifigan_for_sherpa/hifigan_universal_22050hz_v1.onnx +3 -0
- hifigan_for_sherpa/hifigan_vctk_22050hz_v1.onnx +3 -0
- hifigan_for_sherpa/hifigan_vctk_22050hz_v2.onnx +3 -0
- hifigan_for_sherpa/hifigan_vctk_22050hz_v3.onnx +3 -0
- hifigan_for_sherpa/khadijah_LJ_v2_vocoder.wav +3 -0
- hifigan_for_sherpa/khadijah_vctk_v2_vocoder.wav +3 -0
- hifigan_for_sherpa/musa_LJ_v2_vocoder.wav +3 -0
- hifigan_for_sherpa/musa_vctk_v2_vocoder.wav +3 -0
- hifigan_for_sherpa/pretrained/LJ_FT_T2_V1/config.json +38 -0
- hifigan_for_sherpa/pretrained/LJ_FT_T2_V1/generator_v1 +3 -0
- hifigan_for_sherpa/pretrained/LJ_FT_T2_V2/config.json +38 -0
- hifigan_for_sherpa/pretrained/LJ_FT_T2_V2/generator_v2 +3 -0
- hifigan_for_sherpa/pretrained/LJ_FT_T2_V3/config.json +38 -0
- hifigan_for_sherpa/pretrained/LJ_FT_T2_V3/generator_v3 +3 -0
- hifigan_for_sherpa/pretrained/LJ_V1/config.json +38 -0
- hifigan_for_sherpa/pretrained/LJ_V1/generator_v1 +3 -0
- hifigan_for_sherpa/pretrained/LJ_V2/config.json +38 -0
- hifigan_for_sherpa/pretrained/LJ_V2/generator_v2 +3 -0
- hifigan_for_sherpa/pretrained/LJ_V3/config.json +38 -0
- hifigan_for_sherpa/pretrained/LJ_V3/generator_v3 +3 -0
- hifigan_for_sherpa/pretrained/UNIVERSAL_V1/config.json +37 -0
- hifigan_for_sherpa/pretrained/UNIVERSAL_V1/do_02500000 +3 -0
- hifigan_for_sherpa/pretrained/UNIVERSAL_V1/g_02500000 +3 -0
- hifigan_for_sherpa/pretrained/VCTK_V1/config.json +38 -0
- hifigan_for_sherpa/pretrained/VCTK_V1/generator_v1 +3 -0
- hifigan_for_sherpa/pretrained/VCTK_V2/config.json +38 -0
- hifigan_for_sherpa/pretrained/VCTK_V2/generator_v2 +3 -0
- hifigan_for_sherpa/pretrained/VCTK_V3/config.json +38 -0
- hifigan_for_sherpa/pretrained/VCTK_V3/generator_v3 +3 -0
- hifigan_for_sherpa/source.txt +1 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,18 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
hifigan_for_sherpa/khadijah_LJ_v2_vocoder.wav filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
hifigan_for_sherpa/khadijah_vctk_v2_vocoder.wav filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
hifigan_for_sherpa/musa_LJ_v2_vocoder.wav filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
hifigan_for_sherpa/musa_vctk_v2_vocoder.wav filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
hifigan_for_sherpa/pretrained/LJ_FT_T2_V1/generator_v1 filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
hifigan_for_sherpa/pretrained/LJ_FT_T2_V2/generator_v2 filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
hifigan_for_sherpa/pretrained/LJ_FT_T2_V3/generator_v3 filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
hifigan_for_sherpa/pretrained/LJ_V1/generator_v1 filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
hifigan_for_sherpa/pretrained/LJ_V2/generator_v2 filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
hifigan_for_sherpa/pretrained/LJ_V3/generator_v3 filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
hifigan_for_sherpa/pretrained/UNIVERSAL_V1/do_02500000 filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
hifigan_for_sherpa/pretrained/UNIVERSAL_V1/g_02500000 filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
hifigan_for_sherpa/pretrained/VCTK_V1/generator_v1 filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
hifigan_for_sherpa/pretrained/VCTK_V2/generator_v2 filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
hifigan_for_sherpa/pretrained/VCTK_V3/generator_v3 filter=lfs diff=lfs merge=lfs -text
|
hifigan_for_sherpa/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
hifigan_for_sherpa/README.md
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: cc0-1.0
|
| 3 |
+
---
|
| 4 |
+
|
| 5 |
+
# بسم الله
|
| 6 |
+
|
| 7 |
+
ATTENTION: HIFIGAN IS RETIRED. USE VOCOS INSTEAD.
|
| 8 |
+
|
| 9 |
+
1) Converted universal v1, v2, v3 22050Hz hifigan models from [here](https://github.com/jik876/hifi-gan) and [here](https://drive.google.com/drive/folders/1-eEYTB5Av9jNql0WGBlRoi-WH2J7bp5Y)
|
| 10 |
+
|
| 11 |
+
2) Converted to onnx using [this](https://github.com/k2-fsa/icefall/blob/master/egs/ljspeech/TTS/matcha/export_onnx_hifigan.py)
|
| 12 |
+
|
| 13 |
+
3) Added metadata as such using the above python script:
|
| 14 |
+
{'model_type': 'hifigan', 'model_filename': 'hifigan_universal_22050hz_v1', 'sample_rate': 22050, 'version': 1, 'model_author': 'jik876', 'maintainer': 'k2-fsa', 'dataset': 'Universal', 'url1': 'https://github.com/jik876/hifi-gan', 'url2': ''}
|
| 15 |
+
|
| 16 |
+
Note1: hifigan v2 is much smaller, faster and with lower memory need
|
| 17 |
+
|
| 18 |
+
Note2: As LJ speech is a female single speaker dataset, hifigan based on it does not work well on men's voice. vctk models work well for both genders.
|
| 19 |
+
|
| 20 |
+
Conclusion: Use vctk_v2!
|
hifigan_for_sherpa/hifigan_universal_22050hz_v1.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61dcee7065555aadd136dab133a8a9182a490706f52721cd7acfbc74871e88c5
|
| 3 |
+
size 55750084
|
hifigan_for_sherpa/hifigan_vctk_22050hz_v1.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7479d71687759848ca3d3bc277fc73dd6cc0bec5f014929ae5982999fa0a48e1
|
| 3 |
+
size 55750074
|
hifigan_for_sherpa/hifigan_vctk_22050hz_v2.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a6240d23ee9f6c76e060aed1351d185c7e189cc8c8ffa4f9382ddd44a18b779
|
| 3 |
+
size 3749664
|
hifigan_for_sherpa/hifigan_vctk_22050hz_v3.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4e651fe2bbb3858d36d193a511b9977db164f6451bf66abae40c96bdfb073af
|
| 3 |
+
size 5863679
|
hifigan_for_sherpa/khadijah_LJ_v2_vocoder.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7232b5dcbdc9d11d0077b0504a85410358e6275786678ff3d43a9bb53ab428cf
|
| 3 |
+
size 164988
|
hifigan_for_sherpa/khadijah_vctk_v2_vocoder.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:011363f5f86c91974940574a3328cdd816cd2423ca389f04eef4d59d30ad5a03
|
| 3 |
+
size 165690
|
hifigan_for_sherpa/musa_LJ_v2_vocoder.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:588dbd69c7cb1d9b79243d3a38560b468685a776ea549936ab91af383b24176f
|
| 3 |
+
size 166748
|
hifigan_for_sherpa/musa_vctk_v2_vocoder.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:059774656edaede3a5affc2ae94a3697627fca3d78d2153576c8d113e80a09a6
|
| 3 |
+
size 167196
|
hifigan_for_sherpa/pretrained/LJ_FT_T2_V1/config.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"resblock": "1",
|
| 3 |
+
"num_gpus": 0,
|
| 4 |
+
"batch_size": 16,
|
| 5 |
+
"learning_rate": 0.0004,
|
| 6 |
+
"adam_b1": 0.8,
|
| 7 |
+
"adam_b2": 0.99,
|
| 8 |
+
"lr_decay": 0.999,
|
| 9 |
+
"seed": 1234,
|
| 10 |
+
|
| 11 |
+
"upsample_rates": [8,8,2,2],
|
| 12 |
+
"upsample_kernel_sizes": [16,16,4,4],
|
| 13 |
+
"upsample_initial_channel": 512,
|
| 14 |
+
"resblock_kernel_sizes": [3,7,11],
|
| 15 |
+
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
|
| 16 |
+
"resblock_initial_channel": 256,
|
| 17 |
+
|
| 18 |
+
"segment_size": 8192,
|
| 19 |
+
"num_mels": 80,
|
| 20 |
+
"num_freq": 1025,
|
| 21 |
+
"n_fft": 1024,
|
| 22 |
+
"hop_size": 256,
|
| 23 |
+
"win_size": 1024,
|
| 24 |
+
|
| 25 |
+
"sampling_rate": 22050,
|
| 26 |
+
|
| 27 |
+
"fmin": 0,
|
| 28 |
+
"fmax": 8000,
|
| 29 |
+
"fmax_loss": null,
|
| 30 |
+
|
| 31 |
+
"num_workers": 4,
|
| 32 |
+
|
| 33 |
+
"dist_config": {
|
| 34 |
+
"dist_backend": "nccl",
|
| 35 |
+
"dist_url": "tcp://localhost:54321",
|
| 36 |
+
"world_size": 1
|
| 37 |
+
}
|
| 38 |
+
}
|
hifigan_for_sherpa/pretrained/LJ_FT_T2_V1/generator_v1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64aa99598e561596c69cb86f738890a8400fac97d367159ba6ee1bbb9e348cde
|
| 3 |
+
size 55788858
|
hifigan_for_sherpa/pretrained/LJ_FT_T2_V2/config.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"resblock": "1",
|
| 3 |
+
"num_gpus": 0,
|
| 4 |
+
"batch_size": 16,
|
| 5 |
+
"learning_rate": 0.0004,
|
| 6 |
+
"adam_b1": 0.8,
|
| 7 |
+
"adam_b2": 0.99,
|
| 8 |
+
"lr_decay": 0.999,
|
| 9 |
+
"seed": 1234,
|
| 10 |
+
|
| 11 |
+
"upsample_rates": [8,8,2,2],
|
| 12 |
+
"upsample_kernel_sizes": [16,16,4,4],
|
| 13 |
+
"upsample_initial_channel": 128,
|
| 14 |
+
"resblock_kernel_sizes": [3,7,11],
|
| 15 |
+
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
|
| 16 |
+
"resblock_initial_channel": 64,
|
| 17 |
+
|
| 18 |
+
"segment_size": 8192,
|
| 19 |
+
"num_mels": 80,
|
| 20 |
+
"num_freq": 1025,
|
| 21 |
+
"n_fft": 1024,
|
| 22 |
+
"hop_size": 256,
|
| 23 |
+
"win_size": 1024,
|
| 24 |
+
|
| 25 |
+
"sampling_rate": 22050,
|
| 26 |
+
|
| 27 |
+
"fmin": 0,
|
| 28 |
+
"fmax": 8000,
|
| 29 |
+
"fmax_loss": null,
|
| 30 |
+
|
| 31 |
+
"num_workers": 4,
|
| 32 |
+
|
| 33 |
+
"dist_config": {
|
| 34 |
+
"dist_backend": "nccl",
|
| 35 |
+
"dist_url": "tcp://localhost:54321",
|
| 36 |
+
"world_size": 1
|
| 37 |
+
}
|
| 38 |
+
}
|
hifigan_for_sherpa/pretrained/LJ_FT_T2_V2/generator_v2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d3aff09cc4cc060a804b8efea4093315c331eec5f81e8df094989c49a1fbec7
|
| 3 |
+
size 3758167
|
hifigan_for_sherpa/pretrained/LJ_FT_T2_V3/config.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"resblock": "2",
|
| 3 |
+
"num_gpus": 0,
|
| 4 |
+
"batch_size": 16,
|
| 5 |
+
"learning_rate": 0.0004,
|
| 6 |
+
"adam_b1": 0.8,
|
| 7 |
+
"adam_b2": 0.99,
|
| 8 |
+
"lr_decay": 0.999,
|
| 9 |
+
"seed": 1234,
|
| 10 |
+
|
| 11 |
+
"upsample_rates": [8,8,4],
|
| 12 |
+
"upsample_kernel_sizes": [16,16,8],
|
| 13 |
+
"upsample_initial_channel": 256,
|
| 14 |
+
"resblock_kernel_sizes": [3,5,7],
|
| 15 |
+
"resblock_dilation_sizes": [[1,2], [2,6], [3,12]],
|
| 16 |
+
"resblock_initial_channel": 128,
|
| 17 |
+
|
| 18 |
+
"segment_size": 8192,
|
| 19 |
+
"num_mels": 80,
|
| 20 |
+
"num_freq": 1025,
|
| 21 |
+
"n_fft": 1024,
|
| 22 |
+
"hop_size": 256,
|
| 23 |
+
"win_size": 1024,
|
| 24 |
+
|
| 25 |
+
"sampling_rate": 22050,
|
| 26 |
+
|
| 27 |
+
"fmin": 0,
|
| 28 |
+
"fmax": 8000,
|
| 29 |
+
"fmax_loss": null,
|
| 30 |
+
|
| 31 |
+
"num_workers": 4,
|
| 32 |
+
|
| 33 |
+
"dist_config": {
|
| 34 |
+
"dist_backend": "nccl",
|
| 35 |
+
"dist_url": "tcp://localhost:54321",
|
| 36 |
+
"world_size": 1
|
| 37 |
+
}
|
| 38 |
+
}
|
hifigan_for_sherpa/pretrained/LJ_FT_T2_V3/generator_v3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd94d0dc55ae761dde16e7f304be04435ba5f96ccadecff433ba5531da4bc319
|
| 3 |
+
size 5870081
|
hifigan_for_sherpa/pretrained/LJ_V1/config.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"resblock": "1",
|
| 3 |
+
"num_gpus": 0,
|
| 4 |
+
"batch_size": 16,
|
| 5 |
+
"learning_rate": 0.0002,
|
| 6 |
+
"adam_b1": 0.8,
|
| 7 |
+
"adam_b2": 0.99,
|
| 8 |
+
"lr_decay": 0.999,
|
| 9 |
+
"seed": 1234,
|
| 10 |
+
|
| 11 |
+
"upsample_rates": [8,8,2,2],
|
| 12 |
+
"upsample_kernel_sizes": [16,16,4,4],
|
| 13 |
+
"upsample_initial_channel": 512,
|
| 14 |
+
"resblock_kernel_sizes": [3,7,11],
|
| 15 |
+
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
|
| 16 |
+
"resblock_initial_channel": 256,
|
| 17 |
+
|
| 18 |
+
"segment_size": 8192,
|
| 19 |
+
"num_mels": 80,
|
| 20 |
+
"num_freq": 1025,
|
| 21 |
+
"n_fft": 1024,
|
| 22 |
+
"hop_size": 256,
|
| 23 |
+
"win_size": 1024,
|
| 24 |
+
|
| 25 |
+
"sampling_rate": 22050,
|
| 26 |
+
|
| 27 |
+
"fmin": 0,
|
| 28 |
+
"fmax": 8000,
|
| 29 |
+
"fmax_loss": null,
|
| 30 |
+
|
| 31 |
+
"num_workers": 4,
|
| 32 |
+
|
| 33 |
+
"dist_config": {
|
| 34 |
+
"dist_backend": "nccl",
|
| 35 |
+
"dist_url": "tcp://localhost:54321",
|
| 36 |
+
"world_size": 1
|
| 37 |
+
}
|
| 38 |
+
}
|
hifigan_for_sherpa/pretrained/LJ_V1/generator_v1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb4b0cb7f9df59b8e57bb2e51a1bede57b43e9f0454863e3971c491f255505e4
|
| 3 |
+
size 55788858
|
hifigan_for_sherpa/pretrained/LJ_V2/config.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"resblock": "1",
|
| 3 |
+
"num_gpus": 0,
|
| 4 |
+
"batch_size": 16,
|
| 5 |
+
"learning_rate": 0.0002,
|
| 6 |
+
"adam_b1": 0.8,
|
| 7 |
+
"adam_b2": 0.99,
|
| 8 |
+
"lr_decay": 0.999,
|
| 9 |
+
"seed": 1234,
|
| 10 |
+
|
| 11 |
+
"upsample_rates": [8,8,2,2],
|
| 12 |
+
"upsample_kernel_sizes": [16,16,4,4],
|
| 13 |
+
"upsample_initial_channel": 128,
|
| 14 |
+
"resblock_kernel_sizes": [3,7,11],
|
| 15 |
+
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
|
| 16 |
+
"resblock_initial_channel": 64,
|
| 17 |
+
|
| 18 |
+
"segment_size": 8192,
|
| 19 |
+
"num_mels": 80,
|
| 20 |
+
"num_freq": 1025,
|
| 21 |
+
"n_fft": 1024,
|
| 22 |
+
"hop_size": 256,
|
| 23 |
+
"win_size": 1024,
|
| 24 |
+
|
| 25 |
+
"sampling_rate": 22050,
|
| 26 |
+
|
| 27 |
+
"fmin": 0,
|
| 28 |
+
"fmax": 8000,
|
| 29 |
+
"fmax_loss": null,
|
| 30 |
+
|
| 31 |
+
"num_workers": 4,
|
| 32 |
+
|
| 33 |
+
"dist_config": {
|
| 34 |
+
"dist_backend": "nccl",
|
| 35 |
+
"dist_url": "tcp://localhost:54321",
|
| 36 |
+
"world_size": 1
|
| 37 |
+
}
|
| 38 |
+
}
|
hifigan_for_sherpa/pretrained/LJ_V2/generator_v2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3fac378c5918fb2c102733f21eeaa8e9a4ca6cda24dbfddc55bbb947c78d562f
|
| 3 |
+
size 3758167
|
hifigan_for_sherpa/pretrained/LJ_V3/config.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"resblock": "2",
|
| 3 |
+
"num_gpus": 0,
|
| 4 |
+
"batch_size": 16,
|
| 5 |
+
"learning_rate": 0.0002,
|
| 6 |
+
"adam_b1": 0.8,
|
| 7 |
+
"adam_b2": 0.99,
|
| 8 |
+
"lr_decay": 0.999,
|
| 9 |
+
"seed": 1234,
|
| 10 |
+
|
| 11 |
+
"upsample_rates": [8,8,4],
|
| 12 |
+
"upsample_kernel_sizes": [16,16,8],
|
| 13 |
+
"upsample_initial_channel": 256,
|
| 14 |
+
"resblock_kernel_sizes": [3,5,7],
|
| 15 |
+
"resblock_dilation_sizes": [[1,2], [2,6], [3,12]],
|
| 16 |
+
"resblock_initial_channel": 128,
|
| 17 |
+
|
| 18 |
+
"segment_size": 8192,
|
| 19 |
+
"num_mels": 80,
|
| 20 |
+
"num_freq": 1025,
|
| 21 |
+
"n_fft": 1024,
|
| 22 |
+
"hop_size": 256,
|
| 23 |
+
"win_size": 1024,
|
| 24 |
+
|
| 25 |
+
"sampling_rate": 22050,
|
| 26 |
+
|
| 27 |
+
"fmin": 0,
|
| 28 |
+
"fmax": 8000,
|
| 29 |
+
"fmax_loss": null,
|
| 30 |
+
|
| 31 |
+
"num_workers": 4,
|
| 32 |
+
|
| 33 |
+
"dist_config": {
|
| 34 |
+
"dist_backend": "nccl",
|
| 35 |
+
"dist_url": "tcp://localhost:54321",
|
| 36 |
+
"world_size": 1
|
| 37 |
+
}
|
| 38 |
+
}
|
hifigan_for_sherpa/pretrained/LJ_V3/generator_v3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5e89fc0c45924525b7bd0c974aaf8b55aa8e0f9115a83356632d5aa11b8a554
|
| 3 |
+
size 5870081
|
hifigan_for_sherpa/pretrained/UNIVERSAL_V1/config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"resblock": "1",
|
| 3 |
+
"num_gpus": 0,
|
| 4 |
+
"batch_size": 16,
|
| 5 |
+
"learning_rate": 0.0002,
|
| 6 |
+
"adam_b1": 0.8,
|
| 7 |
+
"adam_b2": 0.99,
|
| 8 |
+
"lr_decay": 0.999,
|
| 9 |
+
"seed": 1234,
|
| 10 |
+
|
| 11 |
+
"upsample_rates": [8,8,2,2],
|
| 12 |
+
"upsample_kernel_sizes": [16,16,4,4],
|
| 13 |
+
"upsample_initial_channel": 512,
|
| 14 |
+
"resblock_kernel_sizes": [3,7,11],
|
| 15 |
+
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
|
| 16 |
+
|
| 17 |
+
"segment_size": 8192,
|
| 18 |
+
"num_mels": 80,
|
| 19 |
+
"num_freq": 1025,
|
| 20 |
+
"n_fft": 1024,
|
| 21 |
+
"hop_size": 256,
|
| 22 |
+
"win_size": 1024,
|
| 23 |
+
|
| 24 |
+
"sampling_rate": 22050,
|
| 25 |
+
|
| 26 |
+
"fmin": 0,
|
| 27 |
+
"fmax": 8000,
|
| 28 |
+
"fmax_for_loss": null,
|
| 29 |
+
|
| 30 |
+
"num_workers": 4,
|
| 31 |
+
|
| 32 |
+
"dist_config": {
|
| 33 |
+
"dist_backend": "nccl",
|
| 34 |
+
"dist_url": "tcp://localhost:54321",
|
| 35 |
+
"world_size": 1
|
| 36 |
+
}
|
| 37 |
+
}
|
hifigan_for_sherpa/pretrained/UNIVERSAL_V1/do_02500000
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9418cde901d2faed35376f1841e07ddbad6dc760abb4292d795e02df05f7b5cd
|
| 3 |
+
size 960411640
|
hifigan_for_sherpa/pretrained/UNIVERSAL_V1/g_02500000
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:771eaf4876485a35e25577563d390c262e23c2421e4a8c929eacfde34a5b7a60
|
| 3 |
+
size 55788858
|
hifigan_for_sherpa/pretrained/VCTK_V1/config.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"resblock": "1",
|
| 3 |
+
"num_gpus": 0,
|
| 4 |
+
"batch_size": 16,
|
| 5 |
+
"learning_rate": 0.0002,
|
| 6 |
+
"adam_b1": 0.8,
|
| 7 |
+
"adam_b2": 0.99,
|
| 8 |
+
"lr_decay": 0.999,
|
| 9 |
+
"seed": 1234,
|
| 10 |
+
|
| 11 |
+
"upsample_rates": [8,8,2,2],
|
| 12 |
+
"upsample_kernel_sizes": [16,16,4,4],
|
| 13 |
+
"upsample_initial_channel": 512,
|
| 14 |
+
"resblock_kernel_sizes": [3,7,11],
|
| 15 |
+
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
|
| 16 |
+
"resblock_initial_channel": 256,
|
| 17 |
+
|
| 18 |
+
"segment_size": 8192,
|
| 19 |
+
"num_mels": 80,
|
| 20 |
+
"num_freq": 1025,
|
| 21 |
+
"n_fft": 1024,
|
| 22 |
+
"hop_size": 256,
|
| 23 |
+
"win_size": 1024,
|
| 24 |
+
|
| 25 |
+
"sampling_rate": 22050,
|
| 26 |
+
|
| 27 |
+
"fmin": 0,
|
| 28 |
+
"fmax": 8000,
|
| 29 |
+
"fmax_loss": null,
|
| 30 |
+
|
| 31 |
+
"num_workers": 4,
|
| 32 |
+
|
| 33 |
+
"dist_config": {
|
| 34 |
+
"dist_backend": "nccl",
|
| 35 |
+
"dist_url": "tcp://localhost:54321",
|
| 36 |
+
"world_size": 1
|
| 37 |
+
}
|
| 38 |
+
}
|
hifigan_for_sherpa/pretrained/VCTK_V1/generator_v1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27cdeb835874516f9404d6c1a9ea229b092fd98c329b8444f5955c24cf7b29a1
|
| 3 |
+
size 55788858
|
hifigan_for_sherpa/pretrained/VCTK_V2/config.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"resblock": "1",
|
| 3 |
+
"num_gpus": 0,
|
| 4 |
+
"batch_size": 16,
|
| 5 |
+
"learning_rate": 0.0002,
|
| 6 |
+
"adam_b1": 0.8,
|
| 7 |
+
"adam_b2": 0.99,
|
| 8 |
+
"lr_decay": 0.999,
|
| 9 |
+
"seed": 1234,
|
| 10 |
+
|
| 11 |
+
"upsample_rates": [8,8,2,2],
|
| 12 |
+
"upsample_kernel_sizes": [16,16,4,4],
|
| 13 |
+
"upsample_initial_channel": 128,
|
| 14 |
+
"resblock_kernel_sizes": [3,7,11],
|
| 15 |
+
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
|
| 16 |
+
"resblock_initial_channel": 64,
|
| 17 |
+
|
| 18 |
+
"segment_size": 8192,
|
| 19 |
+
"num_mels": 80,
|
| 20 |
+
"num_freq": 1025,
|
| 21 |
+
"n_fft": 1024,
|
| 22 |
+
"hop_size": 256,
|
| 23 |
+
"win_size": 1024,
|
| 24 |
+
|
| 25 |
+
"sampling_rate": 22050,
|
| 26 |
+
|
| 27 |
+
"fmin": 0,
|
| 28 |
+
"fmax": 8000,
|
| 29 |
+
"fmax_loss": null,
|
| 30 |
+
|
| 31 |
+
"num_workers": 4,
|
| 32 |
+
|
| 33 |
+
"dist_config": {
|
| 34 |
+
"dist_backend": "nccl",
|
| 35 |
+
"dist_url": "tcp://localhost:54321",
|
| 36 |
+
"world_size": 1
|
| 37 |
+
}
|
| 38 |
+
}
|
hifigan_for_sherpa/pretrained/VCTK_V2/generator_v2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:049ff085eeae70ff0e1add39cfd4caad0fcca4511d259a84f68a0d1c1bbb2659
|
| 3 |
+
size 3758167
|
hifigan_for_sherpa/pretrained/VCTK_V3/config.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"resblock": "2",
|
| 3 |
+
"num_gpus": 0,
|
| 4 |
+
"batch_size": 16,
|
| 5 |
+
"learning_rate": 0.0002,
|
| 6 |
+
"adam_b1": 0.8,
|
| 7 |
+
"adam_b2": 0.99,
|
| 8 |
+
"lr_decay": 0.999,
|
| 9 |
+
"seed": 1234,
|
| 10 |
+
|
| 11 |
+
"upsample_rates": [8,8,4],
|
| 12 |
+
"upsample_kernel_sizes": [16,16,8],
|
| 13 |
+
"upsample_initial_channel": 256,
|
| 14 |
+
"resblock_kernel_sizes": [3,5,7],
|
| 15 |
+
"resblock_dilation_sizes": [[1,2], [2,6], [3,12]],
|
| 16 |
+
"resblock_initial_channel": 128,
|
| 17 |
+
|
| 18 |
+
"segment_size": 8192,
|
| 19 |
+
"num_mels": 80,
|
| 20 |
+
"num_freq": 1025,
|
| 21 |
+
"n_fft": 1024,
|
| 22 |
+
"hop_size": 256,
|
| 23 |
+
"win_size": 1024,
|
| 24 |
+
|
| 25 |
+
"sampling_rate": 22050,
|
| 26 |
+
|
| 27 |
+
"fmin": 0,
|
| 28 |
+
"fmax": 8000,
|
| 29 |
+
"fmax_loss": null,
|
| 30 |
+
|
| 31 |
+
"num_workers": 4,
|
| 32 |
+
|
| 33 |
+
"dist_config": {
|
| 34 |
+
"dist_backend": "nccl",
|
| 35 |
+
"dist_url": "tcp://localhost:54322",
|
| 36 |
+
"world_size": 1
|
| 37 |
+
}
|
| 38 |
+
}
|
hifigan_for_sherpa/pretrained/VCTK_V3/generator_v3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac38c592b20783ee5447269920ef8c79c13c79265258a4fecf89ef978cdd7caa
|
| 3 |
+
size 5870081
|
hifigan_for_sherpa/source.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
https://huggingface.co/mah92/hifigan_for_sherpa
|