Spaces:

kochit
/

Voice_Colour

Sleeping

App Files Files Community

kochit commited on Feb 11

Commit

39cb2dd

verified ·

1 Parent(s): e86033f

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -79

app.py CHANGED Viewed

@@ -1,40 +1,32 @@
 import os
-# --- Force CPU (GPU Error ရှောင်ရန်) ---
-os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 import sys
 import torch
 import gradio as gr
 import edge_tts
 import asyncio
-import shutil
 from huggingface_hub import hf_hub_download
-# PyTorch CPU Mode
-torch.cuda.is_available = lambda : False
-print("--- Starting OpenVoice V2 (Myanmar Edition) ---")
-# 1. OpenVoice Setup
 if not os.path.exists("OpenVoice"):
-    os.system("git clone https://github.com/myshell-ai/OpenVoice.git")
 sys.path.append(os.path.abspath("OpenVoice"))
-os.makedirs("checkpoints_v2", exist_ok=True)
-# 2. Download V2 Checkpoints (V2 Model အစစ်)
-def download_models():
-    try:
-        # V2 Converter Model
-        hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints_v2/converter/config.json", local_dir=".", local_dir_use_symlinks=False)
-        hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints_v2/converter/checkpoint.pth", local_dir=".", local_dir_use_symlinks=False)
-        print("V2 Model Downloaded!")
-    except Exception as e:
-        print(f"Download Error: {e}")
-download_models()
-# Import OpenVoice
 try:
     from openvoice.api import ToneColorConverter
     from openvoice import se_extractor
@@ -43,96 +35,79 @@ except ImportError:
     from api import ToneColorConverter
     import se_extractor
-# 3. Load V2 Model
-# V2 path အတိုင်း ညွှန်ပေးရပါမယ်
-ckpt_converter = 'checkpoints_v2/converter'
 if not os.path.exists(f"{ckpt_converter}/config.json"):
-    # Fallback if download path varies
-    ckpt_converter = 'OpenVoice/checkpoints_v2/converter'
-print("Loading V2 Model...")
 try:
-    # V2 requires 'device' argument
-    tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device='cpu')
     tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
-    print("V2 Model Loaded Successfully!")
 except Exception as e:
-    print(f"Model Load Error: {e}")
-# 4. Mastering Engine
-def apply_mastering(input_wav, style="Radio"):
-    if not shutil.which("ffmpeg"): return input_wav
-    output_wav = "outputs/mastered_output.wav"
-    if style == "Radio / Studio":
-        filter = "highpass=f=80, acompressor=threshold=-12dB:ratio=2:attack=5:release=50, equalizer=f=2000:t=q:w=1:g=2, loudnorm"
-    elif style == "Natural":
-        filter = "highpass=f=60, acompressor=threshold=-15dB:ratio=1.5:attack=10:release=100, loudnorm"
-    else: return input_wav
-    try:
-        import subprocess
-        subprocess.run(["ffmpeg", "-y", "-i", input_wav, "-af", filter, "-ar", "44100", output_wav], check=True)
-        return output_wav
-    except: return input_wav
-# 5. Main Workflow
 async def run_edge_tts(text, gender):
     voice = "my-MM-ThihaNeural" if gender == "Male" else "my-MM-NularNeural"
     output_file = "temp_base.mp3"
-    await edge_tts.Communicate(text, voice).save(output_file)
     return output_file
-def predict(text, ref_audio, gender, mastering_style):
-    if not text or not ref_audio: return "Error: Input Missing", None
     try:
-        # A. Edge TTS
         base_audio = asyncio.run(run_edge_tts(text, gender))
-        # B. OpenVoice V2 Conversion
         os.makedirs("outputs", exist_ok=True)
-        # VAD Handling
         try:
             target_se, _ = se_extractor.get_se(ref_audio, tone_color_converter, target_dir='outputs', vad=True)
-        except:
-            target_se, _ = se_extractor.get_se(ref_audio, tone_color_converter, target_dir='outputs', vad=False)
         source_se, _ = se_extractor.get_se(base_audio, tone_color_converter, target_dir='outputs', vad=False)
-        raw_output = "outputs/raw_v2.wav"
         tone_color_converter.convert(
             audio_src_path=base_audio,
             src_se=source_se,
             tgt_se=target_se,
-            output_path=raw_output,
             message="@NanoBanana"
         )
-        # C. Mastering
-        final_output = apply_mastering(raw_output, mastering_style)
-        return "Success (V2)!", final_output
     except Exception as e:
-        return f"Error: {str(e)}", None
-# UI
-with gr.Blocks(title="Myanmar OpenVoice V2") as demo:
-    gr.Markdown("# 🇲🇲 Myanmar Voice Cloning (OpenVoice V2)")
     with gr.Row():
         with gr.Column():
-            input_text = gr.Textbox(label="Text", lines=3)
-            with gr.Row():
-                gender = gr.Radio(["Male", "Female"], value="Male", label="Base Voice")
-                style = gr.Dropdown(["Radio / Studio", "Natural", "Raw"], value="Radio / Studio", label="Mastering")
-            ref = gr.Audio(label="Ref Audio", type="filepath")
-            btn = gr.Button("Generate", variant="primary")
         with gr.Column():
             status = gr.Textbox(label="Status")
-            out = gr.Audio(label="Result")
-    btn.click(predict, [input_text, ref, gender, style], [status, out])
 demo.launch()

 import os
 import sys
+import subprocess
 import torch
 import gradio as gr
 import edge_tts
 import asyncio
 from huggingface_hub import hf_hub_download
+# --- 1. System Setup ---
+print("Setting up OpenVoice...")
+# OpenVoice Repo ကို Clone လုပ်ခြင်း
 if not os.path.exists("OpenVoice"):
+    subprocess.run(["git", "clone", "https://github.com/myshell-ai/OpenVoice.git"])
+# Python Path ထဲသို့ ထည့်ခြင်း
 sys.path.append(os.path.abspath("OpenVoice"))
+# Checkpoint များကို Download ဆွဲခြင်း
+os.makedirs("checkpoints/converter", exist_ok=True)
+try:
+    print("Downloading Model Checkpoints...")
+    hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints/converter/config.json", local_dir=".", local_dir_use_symlinks=False)
+    hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints/converter/checkpoint.pth", local_dir=".", local_dir_use_symlinks=False)
+except Exception as e:
+    print(f"Download Warning: {e}")
+# --- 2. Import Modules ---
 try:
     from openvoice.api import ToneColorConverter
     from openvoice import se_extractor
     from api import ToneColorConverter
     import se_extractor
+# --- 3. Initialize Models ---
+device = "cuda" if torch.cuda.is_available() else "cpu"
+ckpt_converter = 'checkpoints/converter'
 if not os.path.exists(f"{ckpt_converter}/config.json"):
+    ckpt_converter = 'OpenVoice/checkpoints/converter'
 try:
+    tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
     tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
+    print("Model Loaded Successfully!")
 except Exception as e:
+    print(f"Model Loading Error: {e}")
+# --- 4. Main Logic ---
 async def run_edge_tts(text, gender):
+    # မြန်မာအသံ (Thiha = Male, Nular = Female)
     voice = "my-MM-ThihaNeural" if gender == "Male" else "my-MM-NularNeural"
     output_file = "temp_base.mp3"
+    communicate = edge_tts.Communicate(text, voice)
+    await communicate.save(output_file)
     return output_file
+def predict(text, ref_audio, gender, tau):
+    if not text: return "စာရိုက်ထည့်ပါ", None
+    if not ref_audio: return "Reference Audio ထည့်ပါ", None
     try:
+        # Step 1: Edge TTS ဖြင့် မြန်မာစာဖတ်
         base_audio = asyncio.run(run_edge_tts(text, gender))
+        # Step 2: Tone Extract
         os.makedirs("outputs", exist_ok=True)
+        # Reference Audio ကိုတော့ VAD ခံမည် (ဆူညံသံပါနိုင်လို့)
         try:
             target_se, _ = se_extractor.get_se(ref_audio, tone_color_converter, target_dir='outputs', vad=True)
+        except Exception as e:
+             return f"Reference Audio Error (Too Short?): {str(e)}", None
+        # Base Audio (TTS) ကို VAD ပိတ်ထားမည် (Error မတက်အောင်)
         source_se, _ = se_extractor.get_se(base_audio, tone_color_converter, target_dir='outputs', vad=False)
+        # Step 3: Convert
+        output_path = "outputs/final_mm_voice.wav"
         tone_color_converter.convert(
             audio_src_path=base_audio,
             src_se=source_se,
             tgt_se=target_se,
+            output_path=output_path,
             message="@NanoBanana"
         )
+        return "Success! (အဆင်ပြေပါပြီ)", output_path
     except Exception as e:
+        return f"System Error: {str(e)}", None
+# --- 5. UI ---
+with gr.Blocks(title="Myanmar OpenVoice Fixed V2") as demo:
+    gr.Markdown("# 🇲🇲 Myanmar Voice Cloning (Stable Version)")
+    gr.Markdown("မြန်မာစာကို အနည်းဆုံး စာကြောင်းရှည်ရှည် (၂) ကြောင်းခန့် ရိုက်ထည့်ပေးပါ။")
     with gr.Row():
         with gr.Column():
+            input_text = gr.Textbox(label="မြန်မာစာ ရိုက်ပါ", placeholder="မင်္ဂလာပါ... (စာကြောင်းရှည်ရှည်ရေးပေးပါ)", lines=3)
+            gender = gr.Radio(["Male", "Female"], label="EdgeTTS Gender", value="Male")
+            ref_audio = gr.Audio(label="Reference Audio (မူရင်းအသံ)", type="filepath")
+            tau = gr.Slider(0.0, 1.0, value=0.3, label="Similarity (Tau)")
+            btn = gr.Button("Generate Voice", variant="primary")
         with gr.Column():
             status = gr.Textbox(label="Status")
+            audio = gr.Audio(label="Result")
+    btn.click(fn=predict, inputs=[input_text, ref_audio, gender, tau], outputs=[status, audio])
 demo.launch()