Spaces:
Runtime error
Runtime error
| from __future__ import annotations | |
| import gradio as gr | |
| try: | |
| import spaces | |
| except ImportError: # 'spaces' is only provided by Hugging Face Spaces | |
| import types as _types | |
| def _gpu(*args, **kwargs): | |
| if len(args) == 1 and callable(args[0]) and not kwargs: | |
| return args[0] | |
| def _decorator(func): | |
| return func | |
| return _decorator | |
| spaces = _types.SimpleNamespace(GPU=_gpu) | |
| from pyharp import * | |
| import os | |
| import torch | |
| import tempfile | |
| from huggingface_hub import hf_hub_download | |
| # Assuming 'inference.py' and its dependencies are available in the environment. | |
| # This typically means the SoulX-Singer repository is cloned or its modules are in sys.path. | |
| from inference import inference_one_song, load_model_and_config | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Download model files from Hugging Face Hub | |
| MODEL_REPO_ID = "Soul-AILab/SoulX-Singer" | |
| model_pt_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename="model.pt") | |
| config_yaml_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename="config.yaml") | |
| # Load model and config once at setup | |
| MODEL, CONFIG = load_model_and_config(model_pt_path, config_yaml_path, DEVICE) | |
| model_card = ModelCard( | |
| name="SoulX-Singer", | |
| description="SoulX-Singer is a high-fidelity, zero-shot singing voice synthesis model that enables users to generate realistic singing voices for unseen singers. It supports melody-conditioned (F0 contour) and score-conditioned (MIDI notes) control for precise pitch, rhythm, and expression.", | |
| author="Soul-AILab", | |
| tags=["huggingface_hub", "text-to-audio", "music", "singing-voice-synthesis", "svs", "zero-shot", "text-to-speech", "en", "zh", "arxiv:2602.07803", "license:apache-2.0", "region:us"], | |
| ) | |
| def process_fn(reference_audio, midi_file, lyrics, language, transpose): | |
| output_audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name | |
| inference_one_song( | |
| model=MODEL, | |
| config=CONFIG, | |
| input_path=reference_audio, | |
| output_path=output_audio_path, | |
| midi_path=midi_file, | |
| text_path=lyrics, | |
| lang=language, | |
| device=DEVICE, | |
| trans=transpose | |
| ) | |
| return output_audio_path | |
| with gr.Blocks() as demo: | |
| input_components = [ | |
| gr.Audio(type="filepath", label="Reference Audio (for timbre cloning)").harp_required(True).set_info("Upload an audio file to clone its singing timbre."), | |
| gr.File(type="filepath", label="MIDI File", file_types=[".mid", ".midi"]).harp_required(True).set_info("Upload a MIDI file to define the melody and rhythm."), | |
| gr.Textbox(label="Lyrics", info="Enter the lyrics to be sung. Ensure they match the MIDI notes.").harp_required(True), | |
| gr.Dropdown(choices=["en", "zh"], value="en", label="Language", info="Select the language of the lyrics."), | |
| gr.Slider(minimum=-12, maximum=12, step=1, value=0, label="Transpose (semitones)", info="Transpose the generated singing voice by this many semitones."), | |
| ] | |
| output_components = [ | |
| gr.Audio(type="filepath", label="Generated Singing Voice"), | |
| ] | |
| build_endpoint( | |
| model_card=model_card, | |
| input_components=input_components, | |
| output_components=output_components, | |
| process_fn=process_fn, | |
| ) | |
| demo.queue().launch(share=True, show_error=False, pwa=True) | |