Amirox commited on
Commit
fe4a50f
·
1 Parent(s): 1950609

Add run_with_urls function for audio conversion via URLs and integrate MCP-friendly interface in Gradio

Browse files
Files changed (1) hide show
  1. app.py +115 -0
app.py CHANGED
@@ -466,6 +466,87 @@ def run(
466
  return result
467
 
468
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
469
  def audio_conf():
470
  return gr.File(
471
  label="Audio files",
@@ -889,6 +970,40 @@ def get_gui(theme):
889
  )
890
  gr.Markdown(RESOURCES)
891
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
892
  return app
893
 
894
 
 
466
  return result
467
 
468
 
469
+ def run_with_urls(
470
+ audio_url: str,
471
+ model_url: str,
472
+ index_url: str = None,
473
+ pitch_alg: str = "rmvpe+",
474
+ pitch_lvl: int = 0,
475
+ index_inf: float = 0.75,
476
+ r_m_f: int = 3,
477
+ e_r: float = 0.25,
478
+ c_b_p: float = 0.5,
479
+ active_noise_reduce: bool = False,
480
+ audio_effects: bool = False,
481
+ type_output: str = "wav",
482
+ steps: int = 1,
483
+ ):
484
+ """
485
+ Convert audio using RVC voice conversion with URL inputs (MCP-friendly).
486
+
487
+ Args:
488
+ audio_url: URL to audio or video file. Supported: wav, mp3, ogg, flac, m4a, mp4, mkv, webm, avi, mov.
489
+ model_url: URL to the model file (.pth) or zip file containing model.
490
+ index_url: Optional URL to the index file (.index). Leave empty if using zip.
491
+ pitch_alg: Pitch algorithm - one of: pm, harvest, crepe, rmvpe, rmvpe+
492
+ pitch_lvl: Pitch level adjustment (-24 to 24, default: 0)
493
+ index_inf: Index influence (0.0 to 1.0, default: 0.75)
494
+ r_m_f: Respiration median filtering (0 to 7, default: 3)
495
+ e_r: Envelope ratio (0.0 to 1.0, default: 0.25)
496
+ c_b_p: Consonant breath protection (0.0 to 0.5, default: 0.5)
497
+ active_noise_reduce: Apply noise reduction (default: False)
498
+ audio_effects: Apply reverb and compression effects (default: False)
499
+ type_output: Output format - one of: wav, mp3, flac (default: wav)
500
+ steps: Number of conversion steps (1 to 3, default: 1)
501
+
502
+ Returns:
503
+ List of paths to converted audio files.
504
+ """
505
+ # Download audio file
506
+ out_dir = "downloads"
507
+ audio_folder = str(random.randint(10000, 99999))
508
+ audio_dir = os.path.join(out_dir, audio_folder)
509
+ os.makedirs(audio_dir, exist_ok=True)
510
+
511
+ # Supported audio and video formats (ffmpeg will extract audio from video)
512
+ SUPPORTED_FORMATS = (
513
+ # Audio formats
514
+ '.wav', '.mp3', '.ogg', '.flac', '.m4a', '.aac', '.wma', '.opus',
515
+ # Video formats (audio will be extracted)
516
+ '.mp4', '.mkv', '.webm', '.avi', '.mov', '.wmv', '.flv', '.m4v'
517
+ )
518
+
519
+ try:
520
+ # Download audio/video file
521
+ download_manager(url=audio_url.strip(), path=audio_dir, extension="")
522
+ audio_files = [os.path.join(audio_dir, f) for f in os.listdir(audio_dir)
523
+ if f.lower().endswith(SUPPORTED_FORMATS)]
524
+
525
+ if not audio_files:
526
+ raise ValueError(f"No audio/video file found after downloading from {audio_url}. Supported formats: {SUPPORTED_FORMATS}")
527
+
528
+ # Call the main run function with URLs for model/index
529
+ return run(
530
+ audio_files=audio_files,
531
+ file_m=model_url,
532
+ pitch_alg=pitch_alg,
533
+ pitch_lvl=pitch_lvl,
534
+ file_index=index_url,
535
+ index_inf=index_inf,
536
+ r_m_f=r_m_f,
537
+ e_r=e_r,
538
+ c_b_p=c_b_p,
539
+ active_noise_reduce=active_noise_reduce,
540
+ audio_effects=audio_effects,
541
+ type_output=type_output,
542
+ steps=steps,
543
+ )
544
+ finally:
545
+ # Cleanup audio download folder
546
+ t = threading.Thread(target=clear_files, args=(audio_dir,))
547
+ t.start()
548
+
549
+
550
  def audio_conf():
551
  return gr.File(
552
  label="Audio files",
 
970
  )
971
  gr.Markdown(RESOURCES)
972
 
973
+ # MCP-friendly interface (hidden in UI, exposed for API/MCP calls)
974
+ with gr.Accordion("API / MCP Interface", open=False, visible=True):
975
+ gr.Markdown("Use this interface for API calls or MCP integration with URL inputs.")
976
+ with gr.Row():
977
+ mcp_audio_url = gr.Textbox(label="Audio URL", placeholder="https://example.com/audio.wav")
978
+ mcp_model_url = gr.Textbox(label="Model URL (.pth or .zip)", placeholder="https://huggingface.co/.../model.pth")
979
+ mcp_index_url = gr.Textbox(label="Index URL (optional)", placeholder="https://huggingface.co/.../model.index")
980
+ with gr.Row():
981
+ mcp_pitch_alg = gr.Dropdown(PITCH_ALGO_OPT, value="rmvpe+", label="Pitch Algorithm")
982
+ mcp_pitch_lvl = gr.Slider(minimum=-24, maximum=24, value=0, step=1, label="Pitch Level")
983
+ mcp_index_inf = gr.Slider(minimum=0, maximum=1, value=0.75, label="Index Influence")
984
+ with gr.Row():
985
+ mcp_r_m_f = gr.Slider(minimum=0, maximum=7, value=3, step=1, label="Respiration Filter")
986
+ mcp_e_r = gr.Slider(minimum=0, maximum=1, value=0.25, label="Envelope Ratio")
987
+ mcp_c_b_p = gr.Slider(minimum=0, maximum=0.5, value=0.5, label="Consonant Protection")
988
+ with gr.Row():
989
+ mcp_noise_reduce = gr.Checkbox(value=False, label="Noise Reduce")
990
+ mcp_effects = gr.Checkbox(value=False, label="Audio Effects")
991
+ mcp_format = gr.Dropdown(["wav", "mp3", "flac"], value="wav", label="Output Format")
992
+ mcp_steps = gr.Slider(minimum=1, maximum=3, value=1, step=1, label="Steps")
993
+ mcp_button = gr.Button("Convert (URL)", variant="secondary")
994
+ mcp_output = gr.File(label="Result", file_count="multiple")
995
+
996
+ mcp_button.click(
997
+ fn=run_with_urls,
998
+ inputs=[
999
+ mcp_audio_url, mcp_model_url, mcp_index_url,
1000
+ mcp_pitch_alg, mcp_pitch_lvl, mcp_index_inf,
1001
+ mcp_r_m_f, mcp_e_r, mcp_c_b_p,
1002
+ mcp_noise_reduce, mcp_effects, mcp_format, mcp_steps
1003
+ ],
1004
+ outputs=[mcp_output],
1005
+ )
1006
+
1007
  return app
1008
 
1009