| import gradio as gr |
| import os |
| import requests |
| import zipfile |
| import asyncio |
| import edge_tts |
| import soundfile as sf |
| from infer_rvc_python import BaseLoader |
|
|
| |
| MODEL_DIR = "voice_models" |
| os.makedirs(MODEL_DIR, exist_ok=True) |
|
|
| |
| |
| print("Initializing RVC Engine...") |
| rvc_converter = BaseLoader(only_cpu=True, hubert_path=None, rmvpe_path=None) |
|
|
| |
|
|
| def download_and_extract_model(zip_url, model_name): |
| """Downloads the zip link and extracts the RVC files.""" |
| if not zip_url or not model_name: |
| return "Error: Please provide both a URL and a Model Name." |
| |
| model_folder = os.path.join(MODEL_DIR, model_name) |
| os.makedirs(model_folder, exist_ok=True) |
| zip_path = os.path.join(model_folder, "model.zip") |
| |
| try: |
| response = requests.get(zip_url, stream=True) |
| response.raise_for_status() |
| with open(zip_path, "wb") as f: |
| for chunk in response.iter_content(chunk_size=8192): |
| f.write(chunk) |
| |
| with zipfile.ZipFile(zip_path, 'r') as zip_ref: |
| zip_ref.extractall(model_folder) |
| |
| os.remove(zip_path) |
| |
| |
| pth_found = any(f.endswith(".pth") for root, dirs, files in os.walk(model_folder) for f in files) |
| |
| if pth_found: |
| return f"Success! Model '{model_name}' downloaded and imported." |
| else: |
| return "Warning: Downloaded successfully, but no .pth file was found in the zip." |
| |
| except Exception as e: |
| return f"Error downloading model: {str(e)}" |
|
|
| async def generate_base_tts(text, output_path): |
| """Generates the base audio using Edge-TTS (Neutral Male Voice).""" |
| communicate = edge_tts.Communicate(text, "en-US-ChristopherNeural") |
| await communicate.save(output_path) |
|
|
| def text_to_custom_speech(text, model_name, pitch_adjustment): |
| """Generates text, then applies the RVC model to change the voice.""" |
| if not text: |
| return None, "Error: Please enter some text." |
| |
| model_folder = os.path.join(MODEL_DIR, model_name) |
| if not model_name or not os.path.exists(model_folder): |
| return None, "Error: Please import a valid model first." |
| |
| try: |
| base_audio_path = "temp_base.wav" |
| output_audio_path = "final_output.wav" |
| |
| |
| pth_file = None |
| index_file = None |
| for root, dirs, files in os.walk(model_folder): |
| for file in files: |
| if file.endswith(".pth"): |
| pth_file = os.path.join(root, file) |
| if file.endswith(".index"): |
| index_file = os.path.join(root, file) |
|
|
| if not pth_file: |
| return None, "Error: No .pth file found for this model." |
|
|
| |
| asyncio.run(generate_base_tts(text, base_audio_path)) |
| |
| |
| rvc_converter.apply_conf( |
| tag=model_name, |
| file_model=pth_file, |
| pitch_algo="rmvpe", |
| pitch_lvl=pitch_adjustment, |
| file_index=index_file if index_file else "", |
| index_influence=0.66, |
| respiration_median_filtering=3, |
| envelope_ratio=0.25, |
| consonant_breath_protection=0.33 |
| ) |
| |
| result_array, sample_rate = rvc_converter.generate_from_cache( |
| audio_data=base_audio_path, |
| tag=model_name, |
| ) |
| |
| |
| sf.write(file=output_audio_path, samplerate=sample_rate, data=result_array) |
| |
| return output_audio_path, "Speech generated successfully with custom voice!" |
| |
| except Exception as e: |
| return None, f"Error generating speech: {str(e)}" |
|
|
| |
|
|
| with gr.Blocks(theme=gr.themes.Soft()) as app: |
| gr.Markdown("# 🎙️ RVC Text-to-Speech Space") |
| gr.Markdown("Import a custom voice model via a direct zip link, then generate text-to-speech using that voice.") |
| |
| with gr.Row(): |
| with gr.Column(): |
| gr.Markdown("### 1. Import Voice Model") |
| model_url_input = gr.Textbox(label="Model Zip URL (e.g., HuggingFace resolve link)", placeholder="https://huggingface.co/...") |
| model_name_input = gr.Textbox(label="Model Name", placeholder="e.g., needlev2") |
| import_btn = gr.Button("Done (Import Model)", variant="primary") |
| import_status = gr.Textbox(label="Import Status", interactive=False) |
| |
| with gr.Column(): |
| gr.Markdown("### 2. Text to Speech") |
| text_input = gr.Textbox(label="Enter Text", lines=4, placeholder="Type what you want the voice to say here...") |
| pitch_slider = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch Adjustment (Set to +12 for Female voices, 0 or -12 for Male)") |
| generate_btn = gr.Button("Done (Generate Speech)", variant="primary") |
| |
| audio_output = gr.Audio(label="Generated Audio", type="filepath") |
| generation_status = gr.Textbox(label="Status", interactive=False) |
|
|
| |
| import_btn.click( |
| fn=download_and_extract_model, |
| inputs=[model_url_input, model_name_input], |
| outputs=import_status |
| ) |
| |
| generate_btn.click( |
| fn=text_to_custom_speech, |
| inputs=[text_input, model_name_input, pitch_slider], |
| outputs=[audio_output, generation_status] |
| ) |
|
|
| app.launch() |