Voice_Colour / app.py
kochit's picture
Update app.py
be732bb verified
import os
# --- FORCE CPU (GPU Error ရှောင်ရန်) ---
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import sys
import torch
import gradio as gr
import edge_tts
import asyncio
import shutil
from huggingface_hub import hf_hub_download
# PyTorch CPU Mode
torch.cuda.is_available = lambda : False
print(f"πŸš€ System Mode: CPU Only (OpenVoice V1)")
# --- 1. Setup OpenVoice V1 ---
if not os.path.exists("OpenVoice"):
print("Cloning OpenVoice V1...")
os.system("git clone https://github.com/myshell-ai/OpenVoice.git")
sys.path.append(os.path.abspath("OpenVoice"))
# V1 Checkpoints Folder
os.makedirs("checkpoints/converter", exist_ok=True)
# V1 Model Download (Hugging Face Source)
def download_v1_models():
if not os.path.exists("checkpoints/converter/checkpoint.pth"):
print("Downloading V1 Model from Hugging Face...")
try:
# Config File
hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints/converter/config.json", local_dir=".", local_dir_use_symlinks=False)
# Model File (V1)
hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints/converter/checkpoint.pth", local_dir=".", local_dir_use_symlinks=False)
print("βœ… V1 Model Downloaded!")
except Exception as e:
print(f"Download Error: {e}")
download_v1_models()
# Import Modules
try:
from openvoice.api import ToneColorConverter
from openvoice import se_extractor
except ImportError:
sys.path.append(os.path.abspath("OpenVoice/openvoice"))
from api import ToneColorConverter
import se_extractor
# --- 2. Load V1 Model ---
ckpt_converter = 'checkpoints/converter'
if not os.path.exists(f"{ckpt_converter}/config.json"):
# Fallback logic
ckpt_converter = 'OpenVoice/checkpoints/converter'
print(f"Loading V1 Model...")
try:
# V1 Model Load (CPU)
tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device='cpu')
tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
print("βœ… V1 Model Loaded Successfully!")
except Exception as e:
print(f"Model Load Error: {e}")
tone_color_converter = None
# --- 3. Mastering Engine (Audio Fix) ---
def apply_mastering(input_wav, style="Radio"):
if not shutil.which("ffmpeg"): return input_wav
output_wav = "outputs/mastered_output.wav"
if style == "Radio / Studio":
filter = "highpass=f=80, acompressor=threshold=-12dB:ratio=2:attack=5:release=50, equalizer=f=2000:t=q:w=1:g=2, loudnorm"
elif style == "Natural":
filter = "highpass=f=60, acompressor=threshold=-15dB:ratio=1.5:attack=10:release=100, loudnorm"
else: return input_wav
try:
import subprocess
subprocess.run(["ffmpeg", "-y", "-i", input_wav, "-af", filter, "-ar", "44100", output_wav], check=True)
return output_wav
except: return input_wav
# --- 4. Main Workflow ---
async def run_edge_tts(text, gender):
voice = "my-MM-ThihaNeural" if gender == "Male" else "my-MM-NularNeural"
output_file = "temp_base.mp3"
await edge_tts.Communicate(text, voice).save(output_file)
return output_file
def predict(text, ref_audio, gender, mastering_style):
if tone_color_converter is None:
return "System Error: Model failed to load.", None
if not text: return "α€…α€¬α€›α€­α€―α€€α€Ία€‘α€Šα€·α€Ία€•α€«", None
if not ref_audio: return "Reference Audio α€‘α€Šα€·α€Ία€•α€«", None
try:
# Step A: Edge TTS
base_audio = asyncio.run(run_edge_tts(text, gender))
# Step B: OpenVoice V1
os.makedirs("outputs", exist_ok=True)
# VAD Handling
try:
target_se, _ = se_extractor.get_se(ref_audio, tone_color_converter, target_dir='outputs', vad=True)
except:
target_se, _ = se_extractor.get_se(ref_audio, tone_color_converter, target_dir='outputs', vad=False)
source_se, _ = se_extractor.get_se(base_audio, tone_color_converter, target_dir='outputs', vad=False)
raw_output = "outputs/raw_v1.wav"
tone_color_converter.convert(
audio_src_path=base_audio,
src_se=source_se,
tgt_se=target_se,
output_path=raw_output,
message="@NanoBanana"
)
# Step C: Mastering
final_output = apply_mastering(raw_output, mastering_style)
return "Success (V1)!", final_output
except Exception as e:
import traceback
traceback.print_exc()
return f"Error: {str(e)}", None
# UI
with gr.Blocks(title="Myanmar OpenVoice V1") as demo:
gr.Markdown("# πŸ‡²πŸ‡² Myanmar Voice Cloning (OpenVoice V1)")
gr.Markdown("V1 Model + CPU Stable Mode")
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="မြန်မာစာ ရိုက်ပါ", lines=3, placeholder="α€™α€„α€Ία€Ήα€‚α€œα€¬α€•α€«...")
with gr.Row():
gender = gr.Radio(["Male", "Female"], value="Male", label="Base Voice")
style = gr.Dropdown(["Radio / Studio", "Natural", "Raw"], value="Radio / Studio", label="Mastering")
ref = gr.Audio(label="Reference Audio", type="filepath")
btn = gr.Button("Generate Voice", variant="primary")
with gr.Column():
status = gr.Textbox(label="Status")
out = gr.Audio(label="Result")
btn.click(predict, [input_text, ref, gender, style], [status, out])
demo.launch()