kochit commited on
Commit
39cb2dd
·
verified ·
1 Parent(s): e86033f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -79
app.py CHANGED
@@ -1,40 +1,32 @@
1
  import os
2
- # --- Force CPU (GPU Error ရှောင်ရန်) ---
3
- os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
4
-
5
  import sys
 
6
  import torch
7
  import gradio as gr
8
  import edge_tts
9
  import asyncio
10
- import shutil
11
  from huggingface_hub import hf_hub_download
12
 
13
- # PyTorch CPU Mode
14
- torch.cuda.is_available = lambda : False
15
-
16
- print("--- Starting OpenVoice V2 (Myanmar Edition) ---")
17
 
18
- # 1. OpenVoice Setup
19
  if not os.path.exists("OpenVoice"):
20
- os.system("git clone https://github.com/myshell-ai/OpenVoice.git")
21
 
 
22
  sys.path.append(os.path.abspath("OpenVoice"))
23
- os.makedirs("checkpoints_v2", exist_ok=True)
24
-
25
- # 2. Download V2 Checkpoints (V2 Model အစစ်)
26
- def download_models():
27
- try:
28
- # V2 Converter Model
29
- hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints_v2/converter/config.json", local_dir=".", local_dir_use_symlinks=False)
30
- hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints_v2/converter/checkpoint.pth", local_dir=".", local_dir_use_symlinks=False)
31
- print("V2 Model Downloaded!")
32
- except Exception as e:
33
- print(f"Download Error: {e}")
34
 
35
- download_models()
 
 
 
 
 
 
 
36
 
37
- # Import OpenVoice
38
  try:
39
  from openvoice.api import ToneColorConverter
40
  from openvoice import se_extractor
@@ -43,96 +35,79 @@ except ImportError:
43
  from api import ToneColorConverter
44
  import se_extractor
45
 
46
- # 3. Load V2 Model
47
- # V2 path အတိုင်း ညွှန်ပေးရပါမယ်
48
- ckpt_converter = 'checkpoints_v2/converter'
 
49
  if not os.path.exists(f"{ckpt_converter}/config.json"):
50
- # Fallback if download path varies
51
- ckpt_converter = 'OpenVoice/checkpoints_v2/converter'
52
 
53
- print("Loading V2 Model...")
54
  try:
55
- # V2 requires 'device' argument
56
- tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device='cpu')
57
  tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
58
- print("V2 Model Loaded Successfully!")
59
  except Exception as e:
60
- print(f"Model Load Error: {e}")
61
 
62
- # 4. Mastering Engine
63
- def apply_mastering(input_wav, style="Radio"):
64
- if not shutil.which("ffmpeg"): return input_wav
65
- output_wav = "outputs/mastered_output.wav"
66
-
67
- if style == "Radio / Studio":
68
- filter = "highpass=f=80, acompressor=threshold=-12dB:ratio=2:attack=5:release=50, equalizer=f=2000:t=q:w=1:g=2, loudnorm"
69
- elif style == "Natural":
70
- filter = "highpass=f=60, acompressor=threshold=-15dB:ratio=1.5:attack=10:release=100, loudnorm"
71
- else: return input_wav
72
-
73
- try:
74
- import subprocess
75
- subprocess.run(["ffmpeg", "-y", "-i", input_wav, "-af", filter, "-ar", "44100", output_wav], check=True)
76
- return output_wav
77
- except: return input_wav
78
-
79
- # 5. Main Workflow
80
  async def run_edge_tts(text, gender):
 
81
  voice = "my-MM-ThihaNeural" if gender == "Male" else "my-MM-NularNeural"
82
  output_file = "temp_base.mp3"
83
- await edge_tts.Communicate(text, voice).save(output_file)
 
84
  return output_file
85
 
86
- def predict(text, ref_audio, gender, mastering_style):
87
- if not text or not ref_audio: return "Error: Input Missing", None
 
88
 
89
  try:
90
- # A. Edge TTS
91
  base_audio = asyncio.run(run_edge_tts(text, gender))
92
 
93
- # B. OpenVoice V2 Conversion
94
  os.makedirs("outputs", exist_ok=True)
95
 
96
- # VAD Handling
97
  try:
98
  target_se, _ = se_extractor.get_se(ref_audio, tone_color_converter, target_dir='outputs', vad=True)
99
- except:
100
- target_se, _ = se_extractor.get_se(ref_audio, tone_color_converter, target_dir='outputs', vad=False)
101
 
 
102
  source_se, _ = se_extractor.get_se(base_audio, tone_color_converter, target_dir='outputs', vad=False)
103
 
104
- raw_output = "outputs/raw_v2.wav"
 
105
  tone_color_converter.convert(
106
  audio_src_path=base_audio,
107
  src_se=source_se,
108
  tgt_se=target_se,
109
- output_path=raw_output,
110
  message="@NanoBanana"
111
  )
112
-
113
- # C. Mastering
114
- final_output = apply_mastering(raw_output, mastering_style)
115
- return "Success (V2)!", final_output
116
-
117
  except Exception as e:
118
- return f"Error: {str(e)}", None
119
 
120
- # UI
121
- with gr.Blocks(title="Myanmar OpenVoice V2") as demo:
122
- gr.Markdown("# 🇲🇲 Myanmar Voice Cloning (OpenVoice V2)")
 
123
 
124
  with gr.Row():
125
  with gr.Column():
126
- input_text = gr.Textbox(label="Text", lines=3)
127
- with gr.Row():
128
- gender = gr.Radio(["Male", "Female"], value="Male", label="Base Voice")
129
- style = gr.Dropdown(["Radio / Studio", "Natural", "Raw"], value="Radio / Studio", label="Mastering")
130
- ref = gr.Audio(label="Ref Audio", type="filepath")
131
- btn = gr.Button("Generate", variant="primary")
132
  with gr.Column():
133
  status = gr.Textbox(label="Status")
134
- out = gr.Audio(label="Result")
135
-
136
- btn.click(predict, [input_text, ref, gender, style], [status, out])
137
 
138
  demo.launch()
 
1
  import os
 
 
 
2
  import sys
3
+ import subprocess
4
  import torch
5
  import gradio as gr
6
  import edge_tts
7
  import asyncio
 
8
  from huggingface_hub import hf_hub_download
9
 
10
+ # --- 1. System Setup ---
11
+ print("Setting up OpenVoice...")
 
 
12
 
13
+ # OpenVoice Repo ကို Clone လုပ်ခြင်း
14
  if not os.path.exists("OpenVoice"):
15
+ subprocess.run(["git", "clone", "https://github.com/myshell-ai/OpenVoice.git"])
16
 
17
+ # Python Path ထဲသို့ ထည့်ခြင်း
18
  sys.path.append(os.path.abspath("OpenVoice"))
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ # Checkpoint များကို Download ဆွဲခြင်း
21
+ os.makedirs("checkpoints/converter", exist_ok=True)
22
+ try:
23
+ print("Downloading Model Checkpoints...")
24
+ hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints/converter/config.json", local_dir=".", local_dir_use_symlinks=False)
25
+ hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints/converter/checkpoint.pth", local_dir=".", local_dir_use_symlinks=False)
26
+ except Exception as e:
27
+ print(f"Download Warning: {e}")
28
 
29
+ # --- 2. Import Modules ---
30
  try:
31
  from openvoice.api import ToneColorConverter
32
  from openvoice import se_extractor
 
35
  from api import ToneColorConverter
36
  import se_extractor
37
 
38
+ # --- 3. Initialize Models ---
39
+ device = "cuda" if torch.cuda.is_available() else "cpu"
40
+ ckpt_converter = 'checkpoints/converter'
41
+
42
  if not os.path.exists(f"{ckpt_converter}/config.json"):
43
+ ckpt_converter = 'OpenVoice/checkpoints/converter'
 
44
 
 
45
  try:
46
+ tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
 
47
  tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
48
+ print("Model Loaded Successfully!")
49
  except Exception as e:
50
+ print(f"Model Loading Error: {e}")
51
 
52
+ # --- 4. Main Logic ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  async def run_edge_tts(text, gender):
54
+ # မြန်မာအသံ (Thiha = Male, Nular = Female)
55
  voice = "my-MM-ThihaNeural" if gender == "Male" else "my-MM-NularNeural"
56
  output_file = "temp_base.mp3"
57
+ communicate = edge_tts.Communicate(text, voice)
58
+ await communicate.save(output_file)
59
  return output_file
60
 
61
+ def predict(text, ref_audio, gender, tau):
62
+ if not text: return "စာရိုက်ထည့်ပါ", None
63
+ if not ref_audio: return "Reference Audio ထည့်ပါ", None
64
 
65
  try:
66
+ # Step 1: Edge TTS ဖြင့် မြန်မာစာဖတ်
67
  base_audio = asyncio.run(run_edge_tts(text, gender))
68
 
69
+ # Step 2: Tone Extract
70
  os.makedirs("outputs", exist_ok=True)
71
 
72
+ # Reference Audio ကိုတော့ VAD ခံမည် (ဆူညံသံပါနိုင်လို့)
73
  try:
74
  target_se, _ = se_extractor.get_se(ref_audio, tone_color_converter, target_dir='outputs', vad=True)
75
+ except Exception as e:
76
+ return f"Reference Audio Error (Too Short?): {str(e)}", None
77
 
78
+ # Base Audio (TTS) ကို VAD ပိတ်ထားမည် (Error မတက်အောင်)
79
  source_se, _ = se_extractor.get_se(base_audio, tone_color_converter, target_dir='outputs', vad=False)
80
 
81
+ # Step 3: Convert
82
+ output_path = "outputs/final_mm_voice.wav"
83
  tone_color_converter.convert(
84
  audio_src_path=base_audio,
85
  src_se=source_se,
86
  tgt_se=target_se,
87
+ output_path=output_path,
88
  message="@NanoBanana"
89
  )
90
+ return "Success! (အဆင်ပြေပါပြီ)", output_path
 
 
 
 
91
  except Exception as e:
92
+ return f"System Error: {str(e)}", None
93
 
94
+ # --- 5. UI ---
95
+ with gr.Blocks(title="Myanmar OpenVoice Fixed V2") as demo:
96
+ gr.Markdown("# 🇲🇲 Myanmar Voice Cloning (Stable Version)")
97
+ gr.Markdown("မြန်မာစာကို အနည်းဆုံး စာကြောင်းရှည်ရှည် (၂) ကြောင်းခန့် ရိုက်ထည့်ပေးပါ။")
98
 
99
  with gr.Row():
100
  with gr.Column():
101
+ input_text = gr.Textbox(label="မြန်မာစာ ရိုက်ပါ", placeholder="မင်္ဂလာပါ... (စာကြောင်းရှည်ရှည်ရေးပေးပါ)", lines=3)
102
+ gender = gr.Radio(["Male", "Female"], label="EdgeTTS Gender", value="Male")
103
+ ref_audio = gr.Audio(label="Reference Audio (မူရင်းအသံ)", type="filepath")
104
+ tau = gr.Slider(0.0, 1.0, value=0.3, label="Similarity (Tau)")
105
+ btn = gr.Button("Generate Voice", variant="primary")
106
+
107
  with gr.Column():
108
  status = gr.Textbox(label="Status")
109
+ audio = gr.Audio(label="Result")
110
+
111
+ btn.click(fn=predict, inputs=[input_text, ref_audio, gender, tau], outputs=[status, audio])
112
 
113
  demo.launch()