Commit
·
3e667ed
1
Parent(s):
7873cbe
service_request: accomodate requests for text synthesis
Browse files
vc_service_request.py → service_request.py
RENAMED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
Copyright 2023 Balacoon
|
| 3 |
|
| 4 |
contains implementation
|
| 5 |
-
for
|
| 6 |
"""
|
| 7 |
|
| 8 |
import os
|
|
@@ -12,7 +12,7 @@ import hashlib
|
|
| 12 |
import json
|
| 13 |
import ssl
|
| 14 |
import time
|
| 15 |
-
from typing import Tuple
|
| 16 |
|
| 17 |
import numpy as np
|
| 18 |
import resampy
|
|
@@ -62,18 +62,23 @@ def create_signature(api_secret: str) -> str:
|
|
| 62 |
return signature
|
| 63 |
|
| 64 |
|
| 65 |
-
async def async_service_request(source: np.ndarray, target: np.ndarray, api_key: str, api_secret: str) -> np.ndarray:
|
| 66 |
ssl_context = ssl.create_default_context()
|
| 67 |
|
| 68 |
async with websockets.connect(
|
| 69 |
os.environ["endpoint"], close_timeout=1024, ssl=ssl_context
|
| 70 |
) as websocket:
|
| 71 |
request_dict = {
|
| 72 |
-
"source": base64.b64encode(source.tobytes()).decode("utf-8"),
|
| 73 |
"target": base64.b64encode(target.tobytes()).decode("utf-8"),
|
| 74 |
"api_key": api_key,
|
| 75 |
"signature": create_signature(api_secret),
|
| 76 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
request = json.dumps(request_dict)
|
| 78 |
await websocket.send(request)
|
| 79 |
|
|
@@ -81,7 +86,7 @@ async def async_service_request(source: np.ndarray, target: np.ndarray, api_key:
|
|
| 81 |
result_lst = []
|
| 82 |
while True:
|
| 83 |
try:
|
| 84 |
-
data = await asyncio.wait_for(websocket.recv(), timeout=
|
| 85 |
result_lst.append(np.frombuffer(data, dtype="int16"))
|
| 86 |
except websockets.exceptions.ConnectionClosed:
|
| 87 |
break
|
|
@@ -93,21 +98,29 @@ async def async_service_request(source: np.ndarray, target: np.ndarray, api_key:
|
|
| 93 |
return result
|
| 94 |
|
| 95 |
|
| 96 |
-
def
|
| 97 |
-
source_audio: Tuple[int, np.ndarray], target_audio: Tuple[int, np.ndarray],
|
| 98 |
api_key: str, api_secret: str,
|
| 99 |
) -> Tuple[int, np.ndarray]:
|
| 100 |
"""
|
| 101 |
prepares audio (has to be 16khz mono)
|
| 102 |
and runs request to a voice conversion service
|
| 103 |
"""
|
| 104 |
-
src =
|
|
|
|
|
|
|
| 105 |
tgt = prepare_audio(target_audio)
|
| 106 |
-
if
|
| 107 |
return
|
| 108 |
-
if
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
return
|
| 111 |
|
| 112 |
-
res = asyncio.run(async_service_request(src, tgt, api_key, api_secret))
|
| 113 |
return 16000, res
|
|
|
|
| 2 |
Copyright 2023 Balacoon
|
| 3 |
|
| 4 |
contains implementation
|
| 5 |
+
for Revoice request
|
| 6 |
"""
|
| 7 |
|
| 8 |
import os
|
|
|
|
| 12 |
import json
|
| 13 |
import ssl
|
| 14 |
import time
|
| 15 |
+
from typing import Tuple, Union
|
| 16 |
|
| 17 |
import numpy as np
|
| 18 |
import resampy
|
|
|
|
| 62 |
return signature
|
| 63 |
|
| 64 |
|
| 65 |
+
async def async_service_request(source_str: str, source: np.ndarray, target: np.ndarray, api_key: str, api_secret: str) -> np.ndarray:
|
| 66 |
ssl_context = ssl.create_default_context()
|
| 67 |
|
| 68 |
async with websockets.connect(
|
| 69 |
os.environ["endpoint"], close_timeout=1024, ssl=ssl_context
|
| 70 |
) as websocket:
|
| 71 |
request_dict = {
|
|
|
|
| 72 |
"target": base64.b64encode(target.tobytes()).decode("utf-8"),
|
| 73 |
"api_key": api_key,
|
| 74 |
"signature": create_signature(api_secret),
|
| 75 |
}
|
| 76 |
+
if source_str is not None:
|
| 77 |
+
request_dict["source_str"] = source_str
|
| 78 |
+
elif source is not None:
|
| 79 |
+
request_dict["source"] = base64.b64encode(source.tobytes()).decode("utf-8")
|
| 80 |
+
else:
|
| 81 |
+
return None
|
| 82 |
request = json.dumps(request_dict)
|
| 83 |
await websocket.send(request)
|
| 84 |
|
|
|
|
| 86 |
result_lst = []
|
| 87 |
while True:
|
| 88 |
try:
|
| 89 |
+
data = await asyncio.wait_for(websocket.recv(), timeout=30)
|
| 90 |
result_lst.append(np.frombuffer(data, dtype="int16"))
|
| 91 |
except websockets.exceptions.ConnectionClosed:
|
| 92 |
break
|
|
|
|
| 98 |
return result
|
| 99 |
|
| 100 |
|
| 101 |
+
def service_request(
|
| 102 |
+
source_str: str, source_audio: Tuple[int, np.ndarray], target_audio: Tuple[int, np.ndarray],
|
| 103 |
api_key: str, api_secret: str,
|
| 104 |
) -> Tuple[int, np.ndarray]:
|
| 105 |
"""
|
| 106 |
prepares audio (has to be 16khz mono)
|
| 107 |
and runs request to a voice conversion service
|
| 108 |
"""
|
| 109 |
+
src = None
|
| 110 |
+
if source_audio is not None:
|
| 111 |
+
src = prepare_audio(source_audio)
|
| 112 |
tgt = prepare_audio(target_audio)
|
| 113 |
+
if tgt is None:
|
| 114 |
return
|
| 115 |
+
if source_str is None and src is None:
|
| 116 |
+
return
|
| 117 |
+
if len(tgt) >= 30 * 16000:
|
| 118 |
+
# too long
|
| 119 |
+
return
|
| 120 |
+
if src is not None and len(src) >= 60 * 16000:
|
| 121 |
+
return
|
| 122 |
+
if source_str is not None and len(source_str) > 256:
|
| 123 |
return
|
| 124 |
|
| 125 |
+
res = asyncio.run(async_service_request(src_str, src, tgt, api_key, api_secret))
|
| 126 |
return 16000, res
|