AITextGen / app.py
sugakrit6's picture
Update app.py
f9bb750 verified
import gradio as gr
import os
import requests
import zipfile
import asyncio
import edge_tts
import soundfile as sf
from infer_rvc_python import BaseLoader
# Directory to store downloaded models
MODEL_DIR = "voice_models"
os.makedirs(MODEL_DIR, exist_ok=True)
# Initialize the RVC Loader (CPU mode for the Hugging Face Free Tier)
# This will automatically download required background models on its first run
print("Initializing RVC Engine...")
rvc_converter = BaseLoader(only_cpu=True, hubert_path=None, rmvpe_path=None)
# --- Helper Functions ---
def download_and_extract_model(zip_url, model_name):
"""Downloads the zip link and extracts the RVC files."""
if not zip_url or not model_name:
return "Error: Please provide both a URL and a Model Name."
model_folder = os.path.join(MODEL_DIR, model_name)
os.makedirs(model_folder, exist_ok=True)
zip_path = os.path.join(model_folder, "model.zip")
try:
response = requests.get(zip_url, stream=True)
response.raise_for_status()
with open(zip_path, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(model_folder)
os.remove(zip_path)
# Verify if .pth exists
pth_found = any(f.endswith(".pth") for root, dirs, files in os.walk(model_folder) for f in files)
if pth_found:
return f"Success! Model '{model_name}' downloaded and imported."
else:
return "Warning: Downloaded successfully, but no .pth file was found in the zip."
except Exception as e:
return f"Error downloading model: {str(e)}"
async def generate_base_tts(text, output_path):
"""Generates the base audio using Edge-TTS (Neutral Male Voice)."""
communicate = edge_tts.Communicate(text, "en-US-ChristopherNeural")
await communicate.save(output_path)
def text_to_custom_speech(text, model_name, pitch_adjustment):
"""Generates text, then applies the RVC model to change the voice."""
if not text:
return None, "Error: Please enter some text."
model_folder = os.path.join(MODEL_DIR, model_name)
if not model_name or not os.path.exists(model_folder):
return None, "Error: Please import a valid model first."
try:
base_audio_path = "temp_base.wav"
output_audio_path = "final_output.wav"
# 1. Find the .pth and .index files for the requested model
pth_file = None
index_file = None
for root, dirs, files in os.walk(model_folder):
for file in files:
if file.endswith(".pth"):
pth_file = os.path.join(root, file)
if file.endswith(".index"):
index_file = os.path.join(root, file)
if not pth_file:
return None, "Error: No .pth file found for this model."
# 2. Generate Base TTS
asyncio.run(generate_base_tts(text, base_audio_path))
# 3. Apply RVC Voice Conversion
rvc_converter.apply_conf(
tag=model_name,
file_model=pth_file,
pitch_algo="rmvpe",
pitch_lvl=pitch_adjustment,
file_index=index_file if index_file else "",
index_influence=0.66,
respiration_median_filtering=3,
envelope_ratio=0.25,
consonant_breath_protection=0.33
)
result_array, sample_rate = rvc_converter.generate_from_cache(
audio_data=base_audio_path,
tag=model_name,
)
# Save the final converted audio
sf.write(file=output_audio_path, samplerate=sample_rate, data=result_array)
return output_audio_path, "Speech generated successfully with custom voice!"
except Exception as e:
return None, f"Error generating speech: {str(e)}"
# --- Gradio User Interface ---
with gr.Blocks(theme=gr.themes.Soft()) as app:
gr.Markdown("# 🎙️ RVC Text-to-Speech Space")
gr.Markdown("Import a custom voice model via a direct zip link, then generate text-to-speech using that voice.")
with gr.Row():
with gr.Column():
gr.Markdown("### 1. Import Voice Model")
model_url_input = gr.Textbox(label="Model Zip URL (e.g., HuggingFace resolve link)", placeholder="https://huggingface.co/...")
model_name_input = gr.Textbox(label="Model Name", placeholder="e.g., needlev2")
import_btn = gr.Button("Done (Import Model)", variant="primary")
import_status = gr.Textbox(label="Import Status", interactive=False)
with gr.Column():
gr.Markdown("### 2. Text to Speech")
text_input = gr.Textbox(label="Enter Text", lines=4, placeholder="Type what you want the voice to say here...")
pitch_slider = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch Adjustment (Set to +12 for Female voices, 0 or -12 for Male)")
generate_btn = gr.Button("Done (Generate Speech)", variant="primary")
audio_output = gr.Audio(label="Generated Audio", type="filepath")
generation_status = gr.Textbox(label="Status", interactive=False)
# Wire up the buttons
import_btn.click(
fn=download_and_extract_model,
inputs=[model_url_input, model_name_input],
outputs=import_status
)
generate_btn.click(
fn=text_to_custom_speech,
inputs=[text_input, model_name_input, pitch_slider],
outputs=[audio_output, generation_status]
)
app.launch()