File size: 5,538 Bytes
96d0f22
be732bb
18a5a62
 
0ec06b3
96d0f22
 
 
 
18a5a62
cc08a38
96d0f22
f0b4364
18a5a62
be732bb
96d0f22
be732bb
96d0f22
be732bb
18a5a62
96d0f22
 
e428c83
be732bb
 
e428c83
be732bb
 
 
 
18a5a62
be732bb
 
 
 
 
18a5a62
 
 
be732bb
18a5a62
be732bb
96d0f22
 
 
 
 
 
 
 
be732bb
 
cc08a38
be732bb
 
96d0f22
be732bb
0ec06b3
be732bb
18a5a62
0ec06b3
be732bb
0ec06b3
be732bb
f0b4364
18a5a62
be732bb
18a5a62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ec06b3
be732bb
96d0f22
 
 
18a5a62
96d0f22
 
18a5a62
f0b4364
be732bb
39cb2dd
 
96d0f22
 
cc08a38
96d0f22
 
be732bb
96d0f22
 
be732bb
96d0f22
 
18a5a62
 
96d0f22
 
 
be732bb
18a5a62
96d0f22
 
 
 
18a5a62
96d0f22
 
18a5a62
 
 
be732bb
18a5a62
e428c83
f0b4364
 
18a5a62
e428c83
18a5a62
be732bb
 
 
96d0f22
 
 
18a5a62
 
 
 
 
39cb2dd
96d0f22
 
18a5a62
 
 
96d0f22
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import os
# --- FORCE CPU (GPU Error ရှောင်ရန်) ---
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import sys
import torch
import gradio as gr
import edge_tts
import asyncio
import shutil
from huggingface_hub import hf_hub_download

# PyTorch CPU Mode
torch.cuda.is_available = lambda : False
print(f"πŸš€ System Mode: CPU Only (OpenVoice V1)")

# --- 1. Setup OpenVoice V1 ---
if not os.path.exists("OpenVoice"):
    print("Cloning OpenVoice V1...")
    os.system("git clone https://github.com/myshell-ai/OpenVoice.git")

sys.path.append(os.path.abspath("OpenVoice"))

# V1 Checkpoints Folder
os.makedirs("checkpoints/converter", exist_ok=True)

# V1 Model Download (Hugging Face Source)
def download_v1_models():
    if not os.path.exists("checkpoints/converter/checkpoint.pth"):
        print("Downloading V1 Model from Hugging Face...")
        try:
            # Config File
            hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints/converter/config.json", local_dir=".", local_dir_use_symlinks=False)
            # Model File (V1)
            hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints/converter/checkpoint.pth", local_dir=".", local_dir_use_symlinks=False)
            print("βœ… V1 Model Downloaded!")
        except Exception as e:
            print(f"Download Error: {e}")

download_v1_models()

# Import Modules
try:
    from openvoice.api import ToneColorConverter
    from openvoice import se_extractor
except ImportError:
    sys.path.append(os.path.abspath("OpenVoice/openvoice"))
    from api import ToneColorConverter
    import se_extractor

# --- 2. Load V1 Model ---
ckpt_converter = 'checkpoints/converter'
if not os.path.exists(f"{ckpt_converter}/config.json"):
    # Fallback logic
    ckpt_converter = 'OpenVoice/checkpoints/converter'

print(f"Loading V1 Model...")
try:
    # V1 Model Load (CPU)
    tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device='cpu')
    tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
    print("βœ… V1 Model Loaded Successfully!")
except Exception as e:
    print(f"Model Load Error: {e}")
    tone_color_converter = None

# --- 3. Mastering Engine (Audio Fix) ---
def apply_mastering(input_wav, style="Radio"):
    if not shutil.which("ffmpeg"): return input_wav 
    output_wav = "outputs/mastered_output.wav"
    
    if style == "Radio / Studio":
        filter = "highpass=f=80, acompressor=threshold=-12dB:ratio=2:attack=5:release=50, equalizer=f=2000:t=q:w=1:g=2, loudnorm"
    elif style == "Natural":
        filter = "highpass=f=60, acompressor=threshold=-15dB:ratio=1.5:attack=10:release=100, loudnorm"
    else: return input_wav

    try:
        import subprocess
        subprocess.run(["ffmpeg", "-y", "-i", input_wav, "-af", filter, "-ar", "44100", output_wav], check=True)
        return output_wav
    except: return input_wav

# --- 4. Main Workflow ---
async def run_edge_tts(text, gender):
    voice = "my-MM-ThihaNeural" if gender == "Male" else "my-MM-NularNeural"
    output_file = "temp_base.mp3"
    await edge_tts.Communicate(text, voice).save(output_file)
    return output_file

def predict(text, ref_audio, gender, mastering_style):
    if tone_color_converter is None:
        return "System Error: Model failed to load.", None
    if not text: return "α€…α€¬α€›α€­α€―α€€α€Ία€‘α€Šα€·α€Ία€•α€«", None
    if not ref_audio: return "Reference Audio α€‘α€Šα€·α€Ία€•α€«", None

    try:
        # Step A: Edge TTS
        base_audio = asyncio.run(run_edge_tts(text, gender))

        # Step B: OpenVoice V1
        os.makedirs("outputs", exist_ok=True)
        
        # VAD Handling
        try:
            target_se, _ = se_extractor.get_se(ref_audio, tone_color_converter, target_dir='outputs', vad=True)
        except:
            target_se, _ = se_extractor.get_se(ref_audio, tone_color_converter, target_dir='outputs', vad=False)

        source_se, _ = se_extractor.get_se(base_audio, tone_color_converter, target_dir='outputs', vad=False)

        raw_output = "outputs/raw_v1.wav"
        
        tone_color_converter.convert(
            audio_src_path=base_audio,
            src_se=source_se,
            tgt_se=target_se,
            output_path=raw_output,
            message="@NanoBanana"
        )
        
        # Step C: Mastering
        final_output = apply_mastering(raw_output, mastering_style)
        return "Success (V1)!", final_output

    except Exception as e:
        import traceback
        traceback.print_exc()
        return f"Error: {str(e)}", None

# UI
with gr.Blocks(title="Myanmar OpenVoice V1") as demo:
    gr.Markdown("# πŸ‡²πŸ‡² Myanmar Voice Cloning (OpenVoice V1)")
    gr.Markdown("V1 Model + CPU Stable Mode")
    
    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(label="မြန်မာစာ ရိုက်ပါ", lines=3, placeholder="α€™α€„α€Ία€Ήα€‚α€œα€¬α€•α€«...")
            with gr.Row():
                gender = gr.Radio(["Male", "Female"], value="Male", label="Base Voice")
                style = gr.Dropdown(["Radio / Studio", "Natural", "Raw"], value="Radio / Studio", label="Mastering")
            ref = gr.Audio(label="Reference Audio", type="filepath")
            btn = gr.Button("Generate Voice", variant="primary")
        with gr.Column():
            status = gr.Textbox(label="Status")
            out = gr.Audio(label="Result")
            
    btn.click(predict, [input_text, ref, gender, style], [status, out])

demo.launch()