Commit
·
3ebe5da
1
Parent(s):
9c7fd5c
More fixes to handling stereo, add note on wave
Browse files- app.py +3 -0
- vc_service_request.py +10 -2
app.py
CHANGED
|
@@ -47,6 +47,9 @@ def main():
|
|
| 47 |
2. Select an audio sample that represents the target voice you want to convert to.
|
| 48 |
3. Click the "Convert" button and listen to the result!
|
| 49 |
|
|
|
|
|
|
|
|
|
|
| 50 |
If you are interested to plug in Voice Conversion
|
| 51 |
service into your own application, don't hesitate to get in touch with us at
|
| 52 |
[contact@balacoon.com](mailto:contact@balacoon.com)
|
|
|
|
| 47 |
2. Select an audio sample that represents the target voice you want to convert to.
|
| 48 |
3. Click the "Convert" button and listen to the result!
|
| 49 |
|
| 50 |
+
If providing your own audio files, please use WAVE PCM.
|
| 51 |
+
Service works with 16kHz, 16 bit, mono audio.
|
| 52 |
+
|
| 53 |
If you are interested to plug in Voice Conversion
|
| 54 |
service into your own application, don't hesitate to get in touch with us at
|
| 55 |
[contact@balacoon.com](mailto:contact@balacoon.com)
|
vc_service_request.py
CHANGED
|
@@ -35,8 +35,14 @@ def prepare_audio(audio: Tuple[int, np.ndarray]) -> np.ndarray:
|
|
| 35 |
|
| 36 |
if wav.ndim == 2:
|
| 37 |
# average channels
|
| 38 |
-
wav
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
# ensure proper sampling rate
|
| 41 |
if sr != 16000:
|
| 42 |
wav = (wav / 32768.0).astype(np.float)
|
|
@@ -94,6 +100,8 @@ def vc_service_request(
|
|
| 94 |
"""
|
| 95 |
src = prepare_audio(source_audio)
|
| 96 |
tgt = prepare_audio(target_audio)
|
|
|
|
|
|
|
| 97 |
if len(src) >= 60 * 16000 or len(tgt) >= 30 * 16000:
|
| 98 |
# input is way too long, dont return anything
|
| 99 |
return
|
|
|
|
| 35 |
|
| 36 |
if wav.ndim == 2:
|
| 37 |
# average channels
|
| 38 |
+
if wav.shape[0] == 2:
|
| 39 |
+
wav = np.mean(wav, axis=0, keepdims=False)
|
| 40 |
+
if wav.shape[1] == 2:
|
| 41 |
+
wav = np.mean(wav, axis=1, keepdims=False)
|
| 42 |
+
|
| 43 |
+
if wav.ndim != 1:
|
| 44 |
+
return None
|
| 45 |
+
|
| 46 |
# ensure proper sampling rate
|
| 47 |
if sr != 16000:
|
| 48 |
wav = (wav / 32768.0).astype(np.float)
|
|
|
|
| 100 |
"""
|
| 101 |
src = prepare_audio(source_audio)
|
| 102 |
tgt = prepare_audio(target_audio)
|
| 103 |
+
if not src or not tgt:
|
| 104 |
+
return
|
| 105 |
if len(src) >= 60 * 16000 or len(tgt) >= 30 * 16000:
|
| 106 |
# input is way too long, dont return anything
|
| 107 |
return
|