Add run_with_urls function for audio conversion via URLs and integrate MCP-friendly interface in Gradio
Browse files
app.py
CHANGED
|
@@ -466,6 +466,87 @@ def run(
|
|
| 466 |
return result
|
| 467 |
|
| 468 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 469 |
def audio_conf():
|
| 470 |
return gr.File(
|
| 471 |
label="Audio files",
|
|
@@ -889,6 +970,40 @@ def get_gui(theme):
|
|
| 889 |
)
|
| 890 |
gr.Markdown(RESOURCES)
|
| 891 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 892 |
return app
|
| 893 |
|
| 894 |
|
|
|
|
| 466 |
return result
|
| 467 |
|
| 468 |
|
| 469 |
+
def run_with_urls(
|
| 470 |
+
audio_url: str,
|
| 471 |
+
model_url: str,
|
| 472 |
+
index_url: str = None,
|
| 473 |
+
pitch_alg: str = "rmvpe+",
|
| 474 |
+
pitch_lvl: int = 0,
|
| 475 |
+
index_inf: float = 0.75,
|
| 476 |
+
r_m_f: int = 3,
|
| 477 |
+
e_r: float = 0.25,
|
| 478 |
+
c_b_p: float = 0.5,
|
| 479 |
+
active_noise_reduce: bool = False,
|
| 480 |
+
audio_effects: bool = False,
|
| 481 |
+
type_output: str = "wav",
|
| 482 |
+
steps: int = 1,
|
| 483 |
+
):
|
| 484 |
+
"""
|
| 485 |
+
Convert audio using RVC voice conversion with URL inputs (MCP-friendly).
|
| 486 |
+
|
| 487 |
+
Args:
|
| 488 |
+
audio_url: URL to audio or video file. Supported: wav, mp3, ogg, flac, m4a, mp4, mkv, webm, avi, mov.
|
| 489 |
+
model_url: URL to the model file (.pth) or zip file containing model.
|
| 490 |
+
index_url: Optional URL to the index file (.index). Leave empty if using zip.
|
| 491 |
+
pitch_alg: Pitch algorithm - one of: pm, harvest, crepe, rmvpe, rmvpe+
|
| 492 |
+
pitch_lvl: Pitch level adjustment (-24 to 24, default: 0)
|
| 493 |
+
index_inf: Index influence (0.0 to 1.0, default: 0.75)
|
| 494 |
+
r_m_f: Respiration median filtering (0 to 7, default: 3)
|
| 495 |
+
e_r: Envelope ratio (0.0 to 1.0, default: 0.25)
|
| 496 |
+
c_b_p: Consonant breath protection (0.0 to 0.5, default: 0.5)
|
| 497 |
+
active_noise_reduce: Apply noise reduction (default: False)
|
| 498 |
+
audio_effects: Apply reverb and compression effects (default: False)
|
| 499 |
+
type_output: Output format - one of: wav, mp3, flac (default: wav)
|
| 500 |
+
steps: Number of conversion steps (1 to 3, default: 1)
|
| 501 |
+
|
| 502 |
+
Returns:
|
| 503 |
+
List of paths to converted audio files.
|
| 504 |
+
"""
|
| 505 |
+
# Download audio file
|
| 506 |
+
out_dir = "downloads"
|
| 507 |
+
audio_folder = str(random.randint(10000, 99999))
|
| 508 |
+
audio_dir = os.path.join(out_dir, audio_folder)
|
| 509 |
+
os.makedirs(audio_dir, exist_ok=True)
|
| 510 |
+
|
| 511 |
+
# Supported audio and video formats (ffmpeg will extract audio from video)
|
| 512 |
+
SUPPORTED_FORMATS = (
|
| 513 |
+
# Audio formats
|
| 514 |
+
'.wav', '.mp3', '.ogg', '.flac', '.m4a', '.aac', '.wma', '.opus',
|
| 515 |
+
# Video formats (audio will be extracted)
|
| 516 |
+
'.mp4', '.mkv', '.webm', '.avi', '.mov', '.wmv', '.flv', '.m4v'
|
| 517 |
+
)
|
| 518 |
+
|
| 519 |
+
try:
|
| 520 |
+
# Download audio/video file
|
| 521 |
+
download_manager(url=audio_url.strip(), path=audio_dir, extension="")
|
| 522 |
+
audio_files = [os.path.join(audio_dir, f) for f in os.listdir(audio_dir)
|
| 523 |
+
if f.lower().endswith(SUPPORTED_FORMATS)]
|
| 524 |
+
|
| 525 |
+
if not audio_files:
|
| 526 |
+
raise ValueError(f"No audio/video file found after downloading from {audio_url}. Supported formats: {SUPPORTED_FORMATS}")
|
| 527 |
+
|
| 528 |
+
# Call the main run function with URLs for model/index
|
| 529 |
+
return run(
|
| 530 |
+
audio_files=audio_files,
|
| 531 |
+
file_m=model_url,
|
| 532 |
+
pitch_alg=pitch_alg,
|
| 533 |
+
pitch_lvl=pitch_lvl,
|
| 534 |
+
file_index=index_url,
|
| 535 |
+
index_inf=index_inf,
|
| 536 |
+
r_m_f=r_m_f,
|
| 537 |
+
e_r=e_r,
|
| 538 |
+
c_b_p=c_b_p,
|
| 539 |
+
active_noise_reduce=active_noise_reduce,
|
| 540 |
+
audio_effects=audio_effects,
|
| 541 |
+
type_output=type_output,
|
| 542 |
+
steps=steps,
|
| 543 |
+
)
|
| 544 |
+
finally:
|
| 545 |
+
# Cleanup audio download folder
|
| 546 |
+
t = threading.Thread(target=clear_files, args=(audio_dir,))
|
| 547 |
+
t.start()
|
| 548 |
+
|
| 549 |
+
|
| 550 |
def audio_conf():
|
| 551 |
return gr.File(
|
| 552 |
label="Audio files",
|
|
|
|
| 970 |
)
|
| 971 |
gr.Markdown(RESOURCES)
|
| 972 |
|
| 973 |
+
# MCP-friendly interface (hidden in UI, exposed for API/MCP calls)
|
| 974 |
+
with gr.Accordion("API / MCP Interface", open=False, visible=True):
|
| 975 |
+
gr.Markdown("Use this interface for API calls or MCP integration with URL inputs.")
|
| 976 |
+
with gr.Row():
|
| 977 |
+
mcp_audio_url = gr.Textbox(label="Audio URL", placeholder="https://example.com/audio.wav")
|
| 978 |
+
mcp_model_url = gr.Textbox(label="Model URL (.pth or .zip)", placeholder="https://huggingface.co/.../model.pth")
|
| 979 |
+
mcp_index_url = gr.Textbox(label="Index URL (optional)", placeholder="https://huggingface.co/.../model.index")
|
| 980 |
+
with gr.Row():
|
| 981 |
+
mcp_pitch_alg = gr.Dropdown(PITCH_ALGO_OPT, value="rmvpe+", label="Pitch Algorithm")
|
| 982 |
+
mcp_pitch_lvl = gr.Slider(minimum=-24, maximum=24, value=0, step=1, label="Pitch Level")
|
| 983 |
+
mcp_index_inf = gr.Slider(minimum=0, maximum=1, value=0.75, label="Index Influence")
|
| 984 |
+
with gr.Row():
|
| 985 |
+
mcp_r_m_f = gr.Slider(minimum=0, maximum=7, value=3, step=1, label="Respiration Filter")
|
| 986 |
+
mcp_e_r = gr.Slider(minimum=0, maximum=1, value=0.25, label="Envelope Ratio")
|
| 987 |
+
mcp_c_b_p = gr.Slider(minimum=0, maximum=0.5, value=0.5, label="Consonant Protection")
|
| 988 |
+
with gr.Row():
|
| 989 |
+
mcp_noise_reduce = gr.Checkbox(value=False, label="Noise Reduce")
|
| 990 |
+
mcp_effects = gr.Checkbox(value=False, label="Audio Effects")
|
| 991 |
+
mcp_format = gr.Dropdown(["wav", "mp3", "flac"], value="wav", label="Output Format")
|
| 992 |
+
mcp_steps = gr.Slider(minimum=1, maximum=3, value=1, step=1, label="Steps")
|
| 993 |
+
mcp_button = gr.Button("Convert (URL)", variant="secondary")
|
| 994 |
+
mcp_output = gr.File(label="Result", file_count="multiple")
|
| 995 |
+
|
| 996 |
+
mcp_button.click(
|
| 997 |
+
fn=run_with_urls,
|
| 998 |
+
inputs=[
|
| 999 |
+
mcp_audio_url, mcp_model_url, mcp_index_url,
|
| 1000 |
+
mcp_pitch_alg, mcp_pitch_lvl, mcp_index_inf,
|
| 1001 |
+
mcp_r_m_f, mcp_e_r, mcp_c_b_p,
|
| 1002 |
+
mcp_noise_reduce, mcp_effects, mcp_format, mcp_steps
|
| 1003 |
+
],
|
| 1004 |
+
outputs=[mcp_output],
|
| 1005 |
+
)
|
| 1006 |
+
|
| 1007 |
return app
|
| 1008 |
|
| 1009 |
|