Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
b3ea40b
1
Parent(s):
91deaa2
update code for speech tokenizer
Browse files
app.py
CHANGED
|
@@ -21,10 +21,10 @@ auth_token = os.environ.get("TOKEN_FROM_SECRET")
|
|
| 21 |
# Audio part
|
| 22 |
##########################################
|
| 23 |
from huggingface_hub import snapshot_download
|
| 24 |
-
snapshot_download(repo_id="Emova-ollm/emova_speech_tokenizer",
|
| 25 |
|
| 26 |
-
from
|
| 27 |
-
from
|
| 28 |
|
| 29 |
####################
|
| 30 |
# S2U
|
|
@@ -35,19 +35,20 @@ unit_type = '40ms_multilingual_8888'
|
|
| 35 |
language = 'English'
|
| 36 |
s2u_model_name = 'SPIRAL-FSQ-CTC'
|
| 37 |
|
| 38 |
-
ckpt_path, config_path =
|
| 39 |
-
s2u_model =
|
| 40 |
|
| 41 |
####################
|
| 42 |
# U2S
|
| 43 |
####################
|
| 44 |
-
condition2style_centroid_file = "./
|
| 45 |
condition2style_centroid_file_dict, condition2style_centroid_embedding_dict = load_condition_centroid(condition2style_centroid_file)
|
| 46 |
|
| 47 |
unit_type = '40ms_multilingual_8888_xujing_cosyvoice_FT'
|
| 48 |
language = 'Chinese'
|
| 49 |
-
model_config_file, model_checkpoint_file =
|
| 50 |
net_g, hps = load_U2S_model(model_config_file, model_checkpoint_file, unit_type)
|
|
|
|
| 51 |
|
| 52 |
####################
|
| 53 |
# task format
|
|
|
|
| 21 |
# Audio part
|
| 22 |
##########################################
|
| 23 |
from huggingface_hub import snapshot_download
|
| 24 |
+
snapshot_download(repo_id="Emova-ollm/emova_speech_tokenizer", token=auth_token)
|
| 25 |
|
| 26 |
+
from emova_speech_tokenizer.speech_utils import get_S2U_ckpt_config_path, load_S2U_model, s2u_extract_unit_demo
|
| 27 |
+
from emova_speech_tokenizer.speech_utils import load_condition_centroid, get_U2S_config_checkpoint_file, load_U2S_model, synthesis
|
| 28 |
|
| 29 |
####################
|
| 30 |
# S2U
|
|
|
|
| 35 |
language = 'English'
|
| 36 |
s2u_model_name = 'SPIRAL-FSQ-CTC'
|
| 37 |
|
| 38 |
+
ckpt_path, config_path = get_S2U_ckpt_config_path(unit_type, language)
|
| 39 |
+
s2u_model = load_S2U_model(ckpt_path, config_path, s2u_model_name).cuda()
|
| 40 |
|
| 41 |
####################
|
| 42 |
# U2S
|
| 43 |
####################
|
| 44 |
+
condition2style_centroid_file = "./speech_tokenization/condition_style_centroid/condition2style_centroid.txt"
|
| 45 |
condition2style_centroid_file_dict, condition2style_centroid_embedding_dict = load_condition_centroid(condition2style_centroid_file)
|
| 46 |
|
| 47 |
unit_type = '40ms_multilingual_8888_xujing_cosyvoice_FT'
|
| 48 |
language = 'Chinese'
|
| 49 |
+
model_config_file, model_checkpoint_file = get_U2S_config_checkpoint_file(unit_type, language)
|
| 50 |
net_g, hps = load_U2S_model(model_config_file, model_checkpoint_file, unit_type)
|
| 51 |
+
net_g = net_g.cuda()
|
| 52 |
|
| 53 |
####################
|
| 54 |
# task format
|