update
Browse files- app.py +38 -4
- requirements.txt +3 -1
app.py
CHANGED
|
@@ -8,6 +8,9 @@ import traceback
|
|
| 8 |
import edge_tts
|
| 9 |
import gradio as gr
|
| 10 |
import librosa
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
from src.rmvpe import RMVPE
|
| 13 |
from model_loader import ModelLoader
|
|
@@ -39,9 +42,34 @@ rmvpe_model = RMVPE(
|
|
| 39 |
|
| 40 |
model_loader.load("char2")
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
def tts(
|
| 44 |
-
rvc,
|
|
|
|
| 45 |
speed,
|
| 46 |
pitch,
|
| 47 |
tts_text,
|
|
@@ -61,6 +89,8 @@ def tts(
|
|
| 61 |
print(f"tts_voice: {tts_voice}")
|
| 62 |
print(f"F0: {f0_method}, Key: {f0_up_key}, Index: {index_rate}, Protect: {protect}")
|
| 63 |
|
|
|
|
|
|
|
| 64 |
try:
|
| 65 |
if limitation and len(tts_text) > 280:
|
| 66 |
print("Error: Text too long")
|
|
@@ -86,9 +116,11 @@ def tts(
|
|
| 86 |
)
|
| 87 |
t1 = time.time()
|
| 88 |
edge_time = t1 - t0
|
| 89 |
-
|
| 90 |
-
# audio_opt = f.read()
|
| 91 |
if not rvc:
|
|
|
|
|
|
|
|
|
|
| 92 |
info = f"Success. Time: edge-tts: {edge_time}s"
|
| 93 |
print(info)
|
| 94 |
return (
|
|
@@ -206,7 +238,8 @@ with app:
|
|
| 206 |
label="Input Text",
|
| 207 |
value="I'm Never Gonna Give You Up",
|
| 208 |
)
|
| 209 |
-
rvc = gr.Checkbox(label="Transform Voice", info="Would you like to apply voice transformation? Check means yes", value=
|
|
|
|
| 210 |
with gr.Column():
|
| 211 |
but0 = gr.Button("Convert", variant="primary")
|
| 212 |
info_text = gr.Textbox(label="Output info")
|
|
@@ -216,6 +249,7 @@ with app:
|
|
| 216 |
tts,
|
| 217 |
[
|
| 218 |
rvc,
|
|
|
|
| 219 |
speed,
|
| 220 |
pitch,
|
| 221 |
tts_text,
|
|
|
|
| 8 |
import edge_tts
|
| 9 |
import gradio as gr
|
| 10 |
import librosa
|
| 11 |
+
import numpy as np
|
| 12 |
+
from pydub import AudioSegment
|
| 13 |
+
from scipy.io import wavfile
|
| 14 |
|
| 15 |
from src.rmvpe import RMVPE
|
| 16 |
from model_loader import ModelLoader
|
|
|
|
| 42 |
|
| 43 |
model_loader.load("char2")
|
| 44 |
|
| 45 |
+
def add_robotic_effect(mp3_path):
|
| 46 |
+
audio = AudioSegment.from_mp3(mp3_path)
|
| 47 |
+
|
| 48 |
+
# Convert to numpy array
|
| 49 |
+
data = np.array(audio.get_array_of_samples())
|
| 50 |
+
sample_rate = audio.frame_rate
|
| 51 |
+
|
| 52 |
+
# If stereo, average the channels to mono
|
| 53 |
+
if audio.channels == 2:
|
| 54 |
+
data = data.reshape((-1, 2)).mean(axis=1).astype(np.int16)
|
| 55 |
+
|
| 56 |
+
# Apply delay effect
|
| 57 |
+
delay = 0.05
|
| 58 |
+
alpha = 0.55
|
| 59 |
+
delay_samples = int(delay * sample_rate)
|
| 60 |
+
|
| 61 |
+
delayed_data = np.zeros_like(data)
|
| 62 |
+
delayed_data[delay_samples:] = data[:-delay_samples] * alpha
|
| 63 |
+
delayed_data += data
|
| 64 |
+
|
| 65 |
+
# Clip the values to int16 range
|
| 66 |
+
delayed_data = np.clip(delayed_data, -32768, 32767)
|
| 67 |
+
wavfile.write("processed.wav", sample_rate, delayed_data.astype(np.int16))
|
| 68 |
+
return "processed.wav"
|
| 69 |
|
| 70 |
def tts(
|
| 71 |
+
rvc,
|
| 72 |
+
effect,
|
| 73 |
speed,
|
| 74 |
pitch,
|
| 75 |
tts_text,
|
|
|
|
| 89 |
print(f"tts_voice: {tts_voice}")
|
| 90 |
print(f"F0: {f0_method}, Key: {f0_up_key}, Index: {index_rate}, Protect: {protect}")
|
| 91 |
|
| 92 |
+
edge_output_filename = "edge_output.mp3"
|
| 93 |
+
|
| 94 |
try:
|
| 95 |
if limitation and len(tts_text) > 280:
|
| 96 |
print("Error: Text too long")
|
|
|
|
| 116 |
)
|
| 117 |
t1 = time.time()
|
| 118 |
edge_time = t1 - t0
|
| 119 |
+
|
|
|
|
| 120 |
if not rvc:
|
| 121 |
+
if effect:
|
| 122 |
+
edge_output_filename = add_robotic_effect(edge_output_filename)
|
| 123 |
+
|
| 124 |
info = f"Success. Time: edge-tts: {edge_time}s"
|
| 125 |
print(info)
|
| 126 |
return (
|
|
|
|
| 238 |
label="Input Text",
|
| 239 |
value="I'm Never Gonna Give You Up",
|
| 240 |
)
|
| 241 |
+
rvc = gr.Checkbox(label="Transform Voice", info="Would you like to apply voice transformation? Check means yes", value=False)
|
| 242 |
+
effect = gr.Checkbox(label="Add Effect", info="Would you like to apply Effect?", value=True)
|
| 243 |
with gr.Column():
|
| 244 |
but0 = gr.Button("Convert", variant="primary")
|
| 245 |
info_text = gr.Textbox(label="Output info")
|
|
|
|
| 249 |
tts,
|
| 250 |
[
|
| 251 |
rvc,
|
| 252 |
+
effect,
|
| 253 |
speed,
|
| 254 |
pitch,
|
| 255 |
tts_text,
|
requirements.txt
CHANGED
|
@@ -8,4 +8,6 @@ pyworld==0.3.4
|
|
| 8 |
torchcrepe==0.0.21
|
| 9 |
scikit-learn==1.3.0
|
| 10 |
gradio
|
| 11 |
-
gradio_client
|
|
|
|
|
|
|
|
|
| 8 |
torchcrepe==0.0.21
|
| 9 |
scikit-learn==1.3.0
|
| 10 |
gradio
|
| 11 |
+
gradio_client
|
| 12 |
+
pydub
|
| 13 |
+
scipy
|