kochit commited on
Commit
e428c83
·
verified ·
1 Parent(s): 9e9ea2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -48
app.py CHANGED
@@ -1,32 +1,42 @@
1
  import os
2
  import sys
3
- import subprocess
 
 
 
 
4
  import torch
5
  import gradio as gr
6
  import edge_tts
7
  import asyncio
 
8
  from huggingface_hub import hf_hub_download
9
 
10
- # --- 1. System Setup ---
11
- print("Setting up OpenVoice...")
 
 
 
12
 
13
- # OpenVoice Repo ကို Clone လုပ်ခြင်း
14
  if not os.path.exists("OpenVoice"):
15
- subprocess.run(["git", "clone", "https://github.com/myshell-ai/OpenVoice.git"])
 
16
 
17
- # Python Path ထဲသို့ ထည့်ခြင်း
18
  sys.path.append(os.path.abspath("OpenVoice"))
19
-
20
- # Checkpoint များကို Download ဆွဲခြင်း
21
  os.makedirs("checkpoints/converter", exist_ok=True)
22
- try:
23
- print("Downloading Model Checkpoints...")
24
- hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints/converter/config.json", local_dir=".", local_dir_use_symlinks=False)
25
- hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints/converter/checkpoint.pth", local_dir=".", local_dir_use_symlinks=False)
26
- except Exception as e:
27
- print(f"Download Warning: {e}")
28
 
29
- # --- 2. Import Modules ---
 
 
 
 
 
 
 
 
 
 
30
  try:
31
  from openvoice.api import ToneColorConverter
32
  from openvoice import se_extractor
@@ -35,79 +45,103 @@ except ImportError:
35
  from api import ToneColorConverter
36
  import se_extractor
37
 
38
- # --- 3. Initialize Models ---
39
- device = "cuda" if torch.cuda.is_available() else "cpu"
40
  ckpt_converter = 'checkpoints/converter'
41
-
42
  if not os.path.exists(f"{ckpt_converter}/config.json"):
43
  ckpt_converter = 'OpenVoice/checkpoints/converter'
44
 
45
- try:
46
- tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
47
- tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
48
- print("Model Loaded Successfully!")
49
- except Exception as e:
50
- print(f"Model Loading Error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- # --- 4. Main Logic ---
53
  async def run_edge_tts(text, gender):
54
- # မြန်မာအသံ (Thiha = Male, Nular = Female)
55
  voice = "my-MM-ThihaNeural" if gender == "Male" else "my-MM-NularNeural"
56
  output_file = "temp_base.mp3"
57
  communicate = edge_tts.Communicate(text, voice)
58
  await communicate.save(output_file)
59
  return output_file
60
 
61
- def predict(text, ref_audio, gender, tau):
62
- if not text: return "စာရိုက်ထည့်ပါ", None
63
- if not ref_audio: return "Reference Audio ထည့်ပါ", None
64
 
65
  try:
66
- # Step 1: Edge TTS ဖြင့် မြန်မာစာဖတ်
67
  base_audio = asyncio.run(run_edge_tts(text, gender))
68
 
69
- # Step 2: Tone Extract
70
  os.makedirs("outputs", exist_ok=True)
71
 
72
- # Reference Audio ကိုတော့ VAD ခံမည် (ဆူညံသံပါနိုင်လို့)
73
  try:
74
  target_se, _ = se_extractor.get_se(ref_audio, tone_color_converter, target_dir='outputs', vad=True)
75
  except Exception as e:
76
- return f"Reference Audio Error (Too Short?): {str(e)}", None
 
77
 
78
- # Base Audio (TTS) ကို VAD ပိတ်ထားမည် (Error မတက်အောင်)
79
  source_se, _ = se_extractor.get_se(base_audio, tone_color_converter, target_dir='outputs', vad=False)
80
 
81
- # Step 3: Convert
82
- output_path = "outputs/final_mm_voice.wav"
 
83
  tone_color_converter.convert(
84
  audio_src_path=base_audio,
85
  src_se=source_se,
86
  tgt_se=target_se,
87
- output_path=output_path,
88
  message="@NanoBanana"
89
  )
90
- return "Success! (အဆင်ပြေပါပြီ)", output_path
91
- except Exception as e:
92
- return f"System Error: {str(e)}", None
 
 
93
 
94
- # --- 5. UI ---
95
- with gr.Blocks(title="Myanmar OpenVoice Fixed V2") as demo:
96
- gr.Markdown("# 🇲🇲 Myanmar Voice Cloning (Stable Version)")
97
- gr.Markdown("မြန်မာစာကို အနည်းဆုံး စာကြောင်းရှည်ရှည် (၂) ကြောင်းခန့် ရိုက်ထည့်ပေးပါ။")
 
 
 
 
 
98
 
99
  with gr.Row():
100
  with gr.Column():
101
  input_text = gr.Textbox(label="မြန်မာစာ ရိုက်ပါ", placeholder="မင်္ဂလာပါ... (စာကြောင်းရှည်ရှည်ရေးပေးပါ)", lines=3)
102
- gender = gr.Radio(["Male", "Female"], label="EdgeTTS Gender", value="Male")
103
- ref_audio = gr.Audio(label="Reference Audio (မူရင်းအသံ)", type="filepath")
104
- tau = gr.Slider(0.0, 1.0, value=0.3, label="Similarity (Tau)")
 
105
  btn = gr.Button("Generate Voice", variant="primary")
106
 
107
  with gr.Column():
108
  status = gr.Textbox(label="Status")
109
  audio = gr.Audio(label="Result")
110
 
111
- btn.click(fn=predict, inputs=[input_text, ref_audio, gender, tau], outputs=[status, audio])
112
 
113
  demo.launch()
 
1
  import os
2
  import sys
3
+
4
+ # --- SECRET FIX: Force CPU (သူများ Space ၏ လျှို့ဝှက်ချက်) ---
5
+ # GPU Driver မကောင်းတဲ့ စက်တွေကို ရှောင်ရန် GPU ကို လုံးဝ ဖျောက်ထားလိုက်ပါပြီ။
6
+ os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
7
+
8
  import torch
9
  import gradio as gr
10
  import edge_tts
11
  import asyncio
12
+ import shutil
13
  from huggingface_hub import hf_hub_download
14
 
15
+ # PyTorch ကို CPU အတင်းသုံးခိုင်းခြင်း
16
+ pt_device = "cpu"
17
+ torch.set_default_device(pt_device)
18
+
19
+ print(f"🚀 System Running on: {pt_device.upper()} (Stable Mode)")
20
 
21
+ # 1. Setup OpenVoice
22
  if not os.path.exists("OpenVoice"):
23
+ print("Installing OpenVoice...")
24
+ os.system("git clone https://github.com/myshell-ai/OpenVoice.git")
25
 
 
26
  sys.path.append(os.path.abspath("OpenVoice"))
 
 
27
  os.makedirs("checkpoints/converter", exist_ok=True)
 
 
 
 
 
 
28
 
29
+ # Download Checkpoints
30
+ def download_models():
31
+ try:
32
+ hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints/converter/config.json", local_dir=".", local_dir_use_symlinks=False)
33
+ hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints/converter/checkpoint.pth", local_dir=".", local_dir_use_symlinks=False)
34
+ except Exception as e:
35
+ print(f"Download Error: {e}")
36
+
37
+ download_models()
38
+
39
+ # Import OpenVoice Modules
40
  try:
41
  from openvoice.api import ToneColorConverter
42
  from openvoice import se_extractor
 
45
  from api import ToneColorConverter
46
  import se_extractor
47
 
48
+ # 2. Load Model (Strictly CPU)
49
+ print("Loading OpenVoice Model...")
50
  ckpt_converter = 'checkpoints/converter'
 
51
  if not os.path.exists(f"{ckpt_converter}/config.json"):
52
  ckpt_converter = 'OpenVoice/checkpoints/converter'
53
 
54
+ # Device ကို 'cpu' ဟု အတိအကျ ပေးထားသည်
55
+ tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=pt_device)
56
+ tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
57
+ print("Model Loaded Successfully!")
58
+
59
+ # 3. Mastering Engine
60
+ def apply_mastering(input_wav, style="Radio"):
61
+ if not shutil.which("ffmpeg"):
62
+ return input_wav
63
+ output_wav = "outputs/mastered_output.wav"
64
+ if style == "Radio / Studio (Crisp)":
65
+ filter_complex = "highpass=f=80, acompressor=threshold=-12dB:ratio=2:attack=5:release=50, equalizer=f=2000:t=q:w=1:g=2, loudnorm"
66
+ elif style == "Natural (Soft)":
67
+ filter_complex = "highpass=f=60, acompressor=threshold=-15dB:ratio=1.5:attack=10:release=100, loudnorm"
68
+ else:
69
+ return input_wav
70
+ command = ["ffmpeg", "-y", "-i", input_wav, "-af", filter_complex, "-ar", "44100", output_wav]
71
+ try:
72
+ import subprocess
73
+ subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
74
+ return output_wav
75
+ except:
76
+ return input_wav
77
 
78
+ # 4. Main Workflow
79
  async def run_edge_tts(text, gender):
 
80
  voice = "my-MM-ThihaNeural" if gender == "Male" else "my-MM-NularNeural"
81
  output_file = "temp_base.mp3"
82
  communicate = edge_tts.Communicate(text, voice)
83
  await communicate.save(output_file)
84
  return output_file
85
 
86
+ def predict(text, ref_audio, gender, mastering_style):
87
+ if not text: return "Error: စာရိုက်ထည့်ပါ", None
88
+ if not ref_audio: return "Error: Reference Audio ထည့်ပါ", None
89
 
90
  try:
91
+ # Step A: Edge TTS
92
  base_audio = asyncio.run(run_edge_tts(text, gender))
93
 
94
+ # Step B: OpenVoice (CPU)
95
  os.makedirs("outputs", exist_ok=True)
96
 
97
+ # VAD Handling - Device error ရှောင်ရန် try/except
98
  try:
99
  target_se, _ = se_extractor.get_se(ref_audio, tone_color_converter, target_dir='outputs', vad=True)
100
  except Exception as e:
101
+ print(f"VAD Error (Skipping VAD): {e}")
102
+ target_se, _ = se_extractor.get_se(ref_audio, tone_color_converter, target_dir='outputs', vad=False)
103
 
 
104
  source_se, _ = se_extractor.get_se(base_audio, tone_color_converter, target_dir='outputs', vad=False)
105
 
106
+ raw_output = "outputs/raw_mm_voice.wav"
107
+
108
+ # Conversion
109
  tone_color_converter.convert(
110
  audio_src_path=base_audio,
111
  src_se=source_se,
112
  tgt_se=target_se,
113
+ output_path=raw_output,
114
  message="@NanoBanana"
115
  )
116
+
117
+ # Step C: Mastering
118
+ final_output = apply_mastering(raw_output, mastering_style)
119
+
120
+ return "Success!", final_output
121
 
122
+ except Exception as e:
123
+ # Error အသေးစိတ်ကို Log ထုတ်ကြည့်ခြင်း
124
+ import traceback
125
+ traceback.print_exc()
126
+ return f"Error: {str(e)}", None
127
+
128
+ # 5. UI Setup
129
+ with gr.Blocks(title="Myanmar Voice Studio") as demo:
130
+ gr.Markdown("# 🇲🇲 Myanmar Voice Studio (CPU Stable)")
131
 
132
  with gr.Row():
133
  with gr.Column():
134
  input_text = gr.Textbox(label="မြန်မာစာ ရိုက်ပါ", placeholder="မင်္ဂလာပါ... (စာကြောင်းရှည်ရှည်ရေးပေးပါ)", lines=3)
135
+ with gr.Row():
136
+ gender = gr.Radio(["Male", "Female"], label="Base Voice", value="Male")
137
+ mastering = gr.Dropdown(["Radio / Studio (Crisp)", "Natural (Soft)", "Raw (No Effect)"], value="Radio / Studio (Crisp)", label="Mastering Effect")
138
+ ref_audio = gr.Audio(label="Reference Audio", type="filepath")
139
  btn = gr.Button("Generate Voice", variant="primary")
140
 
141
  with gr.Column():
142
  status = gr.Textbox(label="Status")
143
  audio = gr.Audio(label="Result")
144
 
145
+ btn.click(fn=predict, inputs=[input_text, ref_audio, gender, mastering], outputs=[status, audio])
146
 
147
  demo.launch()