import gradio as gr import os import requests import zipfile import asyncio import edge_tts import soundfile as sf from infer_rvc_python import BaseLoader # Directory to store downloaded models MODEL_DIR = "voice_models" os.makedirs(MODEL_DIR, exist_ok=True) # Initialize the RVC Loader (CPU mode for the Hugging Face Free Tier) # This will automatically download required background models on its first run print("Initializing RVC Engine...") rvc_converter = BaseLoader(only_cpu=True, hubert_path=None, rmvpe_path=None) # --- Helper Functions --- def download_and_extract_model(zip_url, model_name): """Downloads the zip link and extracts the RVC files.""" if not zip_url or not model_name: return "Error: Please provide both a URL and a Model Name." model_folder = os.path.join(MODEL_DIR, model_name) os.makedirs(model_folder, exist_ok=True) zip_path = os.path.join(model_folder, "model.zip") try: response = requests.get(zip_url, stream=True) response.raise_for_status() with open(zip_path, "wb") as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) with zipfile.ZipFile(zip_path, 'r') as zip_ref: zip_ref.extractall(model_folder) os.remove(zip_path) # Verify if .pth exists pth_found = any(f.endswith(".pth") for root, dirs, files in os.walk(model_folder) for f in files) if pth_found: return f"Success! Model '{model_name}' downloaded and imported." else: return "Warning: Downloaded successfully, but no .pth file was found in the zip." except Exception as e: return f"Error downloading model: {str(e)}" async def generate_base_tts(text, output_path): """Generates the base audio using Edge-TTS (Neutral Male Voice).""" communicate = edge_tts.Communicate(text, "en-US-ChristopherNeural") await communicate.save(output_path) def text_to_custom_speech(text, model_name, pitch_adjustment): """Generates text, then applies the RVC model to change the voice.""" if not text: return None, "Error: Please enter some text." model_folder = os.path.join(MODEL_DIR, model_name) if not model_name or not os.path.exists(model_folder): return None, "Error: Please import a valid model first." try: base_audio_path = "temp_base.wav" output_audio_path = "final_output.wav" # 1. Find the .pth and .index files for the requested model pth_file = None index_file = None for root, dirs, files in os.walk(model_folder): for file in files: if file.endswith(".pth"): pth_file = os.path.join(root, file) if file.endswith(".index"): index_file = os.path.join(root, file) if not pth_file: return None, "Error: No .pth file found for this model." # 2. Generate Base TTS asyncio.run(generate_base_tts(text, base_audio_path)) # 3. Apply RVC Voice Conversion rvc_converter.apply_conf( tag=model_name, file_model=pth_file, pitch_algo="rmvpe", pitch_lvl=pitch_adjustment, file_index=index_file if index_file else "", index_influence=0.66, respiration_median_filtering=3, envelope_ratio=0.25, consonant_breath_protection=0.33 ) result_array, sample_rate = rvc_converter.generate_from_cache( audio_data=base_audio_path, tag=model_name, ) # Save the final converted audio sf.write(file=output_audio_path, samplerate=sample_rate, data=result_array) return output_audio_path, "Speech generated successfully with custom voice!" except Exception as e: return None, f"Error generating speech: {str(e)}" # --- Gradio User Interface --- with gr.Blocks(theme=gr.themes.Soft()) as app: gr.Markdown("# 🎙️ RVC Text-to-Speech Space") gr.Markdown("Import a custom voice model via a direct zip link, then generate text-to-speech using that voice.") with gr.Row(): with gr.Column(): gr.Markdown("### 1. Import Voice Model") model_url_input = gr.Textbox(label="Model Zip URL (e.g., HuggingFace resolve link)", placeholder="https://huggingface.co/...") model_name_input = gr.Textbox(label="Model Name", placeholder="e.g., needlev2") import_btn = gr.Button("Done (Import Model)", variant="primary") import_status = gr.Textbox(label="Import Status", interactive=False) with gr.Column(): gr.Markdown("### 2. Text to Speech") text_input = gr.Textbox(label="Enter Text", lines=4, placeholder="Type what you want the voice to say here...") pitch_slider = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch Adjustment (Set to +12 for Female voices, 0 or -12 for Male)") generate_btn = gr.Button("Done (Generate Speech)", variant="primary") audio_output = gr.Audio(label="Generated Audio", type="filepath") generation_status = gr.Textbox(label="Status", interactive=False) # Wire up the buttons import_btn.click( fn=download_and_extract_model, inputs=[model_url_input, model_name_input], outputs=import_status ) generate_btn.click( fn=text_to_custom_speech, inputs=[text_input, model_name_input, pitch_slider], outputs=[audio_output, generation_status] ) app.launch()