Spaces:
Sleeping
Sleeping
File size: 5,538 Bytes
96d0f22 be732bb 18a5a62 0ec06b3 96d0f22 18a5a62 cc08a38 96d0f22 f0b4364 18a5a62 be732bb 96d0f22 be732bb 96d0f22 be732bb 18a5a62 96d0f22 e428c83 be732bb e428c83 be732bb 18a5a62 be732bb 18a5a62 be732bb 18a5a62 be732bb 96d0f22 be732bb cc08a38 be732bb 96d0f22 be732bb 0ec06b3 be732bb 18a5a62 0ec06b3 be732bb 0ec06b3 be732bb f0b4364 18a5a62 be732bb 18a5a62 0ec06b3 be732bb 96d0f22 18a5a62 96d0f22 18a5a62 f0b4364 be732bb 39cb2dd 96d0f22 cc08a38 96d0f22 be732bb 96d0f22 be732bb 96d0f22 18a5a62 96d0f22 be732bb 18a5a62 96d0f22 18a5a62 96d0f22 18a5a62 be732bb 18a5a62 e428c83 f0b4364 18a5a62 e428c83 18a5a62 be732bb 96d0f22 18a5a62 39cb2dd 96d0f22 18a5a62 96d0f22 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | import os
# --- FORCE CPU (GPU Error ααΎα±α¬ααΊαααΊ) ---
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import sys
import torch
import gradio as gr
import edge_tts
import asyncio
import shutil
from huggingface_hub import hf_hub_download
# PyTorch CPU Mode
torch.cuda.is_available = lambda : False
print(f"π System Mode: CPU Only (OpenVoice V1)")
# --- 1. Setup OpenVoice V1 ---
if not os.path.exists("OpenVoice"):
print("Cloning OpenVoice V1...")
os.system("git clone https://github.com/myshell-ai/OpenVoice.git")
sys.path.append(os.path.abspath("OpenVoice"))
# V1 Checkpoints Folder
os.makedirs("checkpoints/converter", exist_ok=True)
# V1 Model Download (Hugging Face Source)
def download_v1_models():
if not os.path.exists("checkpoints/converter/checkpoint.pth"):
print("Downloading V1 Model from Hugging Face...")
try:
# Config File
hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints/converter/config.json", local_dir=".", local_dir_use_symlinks=False)
# Model File (V1)
hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints/converter/checkpoint.pth", local_dir=".", local_dir_use_symlinks=False)
print("β
V1 Model Downloaded!")
except Exception as e:
print(f"Download Error: {e}")
download_v1_models()
# Import Modules
try:
from openvoice.api import ToneColorConverter
from openvoice import se_extractor
except ImportError:
sys.path.append(os.path.abspath("OpenVoice/openvoice"))
from api import ToneColorConverter
import se_extractor
# --- 2. Load V1 Model ---
ckpt_converter = 'checkpoints/converter'
if not os.path.exists(f"{ckpt_converter}/config.json"):
# Fallback logic
ckpt_converter = 'OpenVoice/checkpoints/converter'
print(f"Loading V1 Model...")
try:
# V1 Model Load (CPU)
tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device='cpu')
tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
print("β
V1 Model Loaded Successfully!")
except Exception as e:
print(f"Model Load Error: {e}")
tone_color_converter = None
# --- 3. Mastering Engine (Audio Fix) ---
def apply_mastering(input_wav, style="Radio"):
if not shutil.which("ffmpeg"): return input_wav
output_wav = "outputs/mastered_output.wav"
if style == "Radio / Studio":
filter = "highpass=f=80, acompressor=threshold=-12dB:ratio=2:attack=5:release=50, equalizer=f=2000:t=q:w=1:g=2, loudnorm"
elif style == "Natural":
filter = "highpass=f=60, acompressor=threshold=-15dB:ratio=1.5:attack=10:release=100, loudnorm"
else: return input_wav
try:
import subprocess
subprocess.run(["ffmpeg", "-y", "-i", input_wav, "-af", filter, "-ar", "44100", output_wav], check=True)
return output_wav
except: return input_wav
# --- 4. Main Workflow ---
async def run_edge_tts(text, gender):
voice = "my-MM-ThihaNeural" if gender == "Male" else "my-MM-NularNeural"
output_file = "temp_base.mp3"
await edge_tts.Communicate(text, voice).save(output_file)
return output_file
def predict(text, ref_audio, gender, mastering_style):
if tone_color_converter is None:
return "System Error: Model failed to load.", None
if not text: return "α
α¬ααα―ααΊααα·αΊαα«", None
if not ref_audio: return "Reference Audio ααα·αΊαα«", None
try:
# Step A: Edge TTS
base_audio = asyncio.run(run_edge_tts(text, gender))
# Step B: OpenVoice V1
os.makedirs("outputs", exist_ok=True)
# VAD Handling
try:
target_se, _ = se_extractor.get_se(ref_audio, tone_color_converter, target_dir='outputs', vad=True)
except:
target_se, _ = se_extractor.get_se(ref_audio, tone_color_converter, target_dir='outputs', vad=False)
source_se, _ = se_extractor.get_se(base_audio, tone_color_converter, target_dir='outputs', vad=False)
raw_output = "outputs/raw_v1.wav"
tone_color_converter.convert(
audio_src_path=base_audio,
src_se=source_se,
tgt_se=target_se,
output_path=raw_output,
message="@NanoBanana"
)
# Step C: Mastering
final_output = apply_mastering(raw_output, mastering_style)
return "Success (V1)!", final_output
except Exception as e:
import traceback
traceback.print_exc()
return f"Error: {str(e)}", None
# UI
with gr.Blocks(title="Myanmar OpenVoice V1") as demo:
gr.Markdown("# π²π² Myanmar Voice Cloning (OpenVoice V1)")
gr.Markdown("V1 Model + CPU Stable Mode")
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="ααΌααΊαα¬α
α¬ ααα―ααΊαα«", lines=3, placeholder="αααΊαΉααα¬αα«...")
with gr.Row():
gender = gr.Radio(["Male", "Female"], value="Male", label="Base Voice")
style = gr.Dropdown(["Radio / Studio", "Natural", "Raw"], value="Radio / Studio", label="Mastering")
ref = gr.Audio(label="Reference Audio", type="filepath")
btn = gr.Button("Generate Voice", variant="primary")
with gr.Column():
status = gr.Textbox(label="Status")
out = gr.Audio(label="Result")
btn.click(predict, [input_text, ref, gender, style], [status, out])
demo.launch()
|