import os # --- FORCE CPU (GPU Error ရှောင်ရန်) --- os.environ["CUDA_VISIBLE_DEVICES"] = "-1" import sys import torch import gradio as gr import edge_tts import asyncio import shutil from huggingface_hub import hf_hub_download # PyTorch CPU Mode torch.cuda.is_available = lambda : False print(f"🚀 System Mode: CPU Only (OpenVoice V1)") # --- 1. Setup OpenVoice V1 --- if not os.path.exists("OpenVoice"): print("Cloning OpenVoice V1...") os.system("git clone https://github.com/myshell-ai/OpenVoice.git") sys.path.append(os.path.abspath("OpenVoice")) # V1 Checkpoints Folder os.makedirs("checkpoints/converter", exist_ok=True) # V1 Model Download (Hugging Face Source) def download_v1_models(): if not os.path.exists("checkpoints/converter/checkpoint.pth"): print("Downloading V1 Model from Hugging Face...") try: # Config File hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints/converter/config.json", local_dir=".", local_dir_use_symlinks=False) # Model File (V1) hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints/converter/checkpoint.pth", local_dir=".", local_dir_use_symlinks=False) print("✅ V1 Model Downloaded!") except Exception as e: print(f"Download Error: {e}") download_v1_models() # Import Modules try: from openvoice.api import ToneColorConverter from openvoice import se_extractor except ImportError: sys.path.append(os.path.abspath("OpenVoice/openvoice")) from api import ToneColorConverter import se_extractor # --- 2. Load V1 Model --- ckpt_converter = 'checkpoints/converter' if not os.path.exists(f"{ckpt_converter}/config.json"): # Fallback logic ckpt_converter = 'OpenVoice/checkpoints/converter' print(f"Loading V1 Model...") try: # V1 Model Load (CPU) tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device='cpu') tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth') print("✅ V1 Model Loaded Successfully!") except Exception as e: print(f"Model Load Error: {e}") tone_color_converter = None # --- 3. Mastering Engine (Audio Fix) --- def apply_mastering(input_wav, style="Radio"): if not shutil.which("ffmpeg"): return input_wav output_wav = "outputs/mastered_output.wav" if style == "Radio / Studio": filter = "highpass=f=80, acompressor=threshold=-12dB:ratio=2:attack=5:release=50, equalizer=f=2000:t=q:w=1:g=2, loudnorm" elif style == "Natural": filter = "highpass=f=60, acompressor=threshold=-15dB:ratio=1.5:attack=10:release=100, loudnorm" else: return input_wav try: import subprocess subprocess.run(["ffmpeg", "-y", "-i", input_wav, "-af", filter, "-ar", "44100", output_wav], check=True) return output_wav except: return input_wav # --- 4. Main Workflow --- async def run_edge_tts(text, gender): voice = "my-MM-ThihaNeural" if gender == "Male" else "my-MM-NularNeural" output_file = "temp_base.mp3" await edge_tts.Communicate(text, voice).save(output_file) return output_file def predict(text, ref_audio, gender, mastering_style): if tone_color_converter is None: return "System Error: Model failed to load.", None if not text: return "စာရိုက်ထည့်ပါ", None if not ref_audio: return "Reference Audio ထည့်ပါ", None try: # Step A: Edge TTS base_audio = asyncio.run(run_edge_tts(text, gender)) # Step B: OpenVoice V1 os.makedirs("outputs", exist_ok=True) # VAD Handling try: target_se, _ = se_extractor.get_se(ref_audio, tone_color_converter, target_dir='outputs', vad=True) except: target_se, _ = se_extractor.get_se(ref_audio, tone_color_converter, target_dir='outputs', vad=False) source_se, _ = se_extractor.get_se(base_audio, tone_color_converter, target_dir='outputs', vad=False) raw_output = "outputs/raw_v1.wav" tone_color_converter.convert( audio_src_path=base_audio, src_se=source_se, tgt_se=target_se, output_path=raw_output, message="@NanoBanana" ) # Step C: Mastering final_output = apply_mastering(raw_output, mastering_style) return "Success (V1)!", final_output except Exception as e: import traceback traceback.print_exc() return f"Error: {str(e)}", None # UI with gr.Blocks(title="Myanmar OpenVoice V1") as demo: gr.Markdown("# 🇲🇲 Myanmar Voice Cloning (OpenVoice V1)") gr.Markdown("V1 Model + CPU Stable Mode") with gr.Row(): with gr.Column(): input_text = gr.Textbox(label="မြန်မာစာ ရိုက်ပါ", lines=3, placeholder="မင်္ဂလာပါ...") with gr.Row(): gender = gr.Radio(["Male", "Female"], value="Male", label="Base Voice") style = gr.Dropdown(["Radio / Studio", "Natural", "Raw"], value="Radio / Studio", label="Mastering") ref = gr.Audio(label="Reference Audio", type="filepath") btn = gr.Button("Generate Voice", variant="primary") with gr.Column(): status = gr.Textbox(label="Status") out = gr.Audio(label="Result") btn.click(predict, [input_text, ref, gender, style], [status, out]) demo.launch()