Spaces:

kochit
/

Voice_Colour

Sleeping

App Files Files Community

Voice_Colour / app.py

kochit

Update app.py

be732bb verified about 1 month ago

raw

history blame contribute delete

5.54 kB

	import os
	# --- FORCE CPU (GPU Error ရှောင်ရန်) ---
	os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

	import sys
	import torch
	import gradio as gr
	import edge_tts
	import asyncio
	import shutil
	from huggingface_hub import hf_hub_download

	# PyTorch CPU Mode
	torch.cuda.is_available = lambda : False
	print(f"🚀 System Mode: CPU Only (OpenVoice V1)")

	# --- 1. Setup OpenVoice V1 ---
	if not os.path.exists("OpenVoice"):
	print("Cloning OpenVoice V1...")
	os.system("git clone https://github.com/myshell-ai/OpenVoice.git")

	sys.path.append(os.path.abspath("OpenVoice"))

	# V1 Checkpoints Folder
	os.makedirs("checkpoints/converter", exist_ok=True)

	# V1 Model Download (Hugging Face Source)
	def download_v1_models():
	if not os.path.exists("checkpoints/converter/checkpoint.pth"):
	print("Downloading V1 Model from Hugging Face...")
	try:
	# Config File
	hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints/converter/config.json", local_dir=".", local_dir_use_symlinks=False)
	# Model File (V1)
	hf_hub_download(repo_id="myshell-ai/OpenVoice", filename="checkpoints/converter/checkpoint.pth", local_dir=".", local_dir_use_symlinks=False)
	print("✅ V1 Model Downloaded!")
	except Exception as e:
	print(f"Download Error: {e}")

	download_v1_models()

	# Import Modules
	try:
	from openvoice.api import ToneColorConverter
	from openvoice import se_extractor
	except ImportError:
	sys.path.append(os.path.abspath("OpenVoice/openvoice"))
	from api import ToneColorConverter
	import se_extractor

	# --- 2. Load V1 Model ---
	ckpt_converter = 'checkpoints/converter'
	if not os.path.exists(f"{ckpt_converter}/config.json"):
	# Fallback logic
	ckpt_converter = 'OpenVoice/checkpoints/converter'

	print(f"Loading V1 Model...")
	try:
	# V1 Model Load (CPU)
	tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device='cpu')
	tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
	print("✅ V1 Model Loaded Successfully!")
	except Exception as e:
	print(f"Model Load Error: {e}")
	tone_color_converter = None

	# --- 3. Mastering Engine (Audio Fix) ---
	def apply_mastering(input_wav, style="Radio"):
	if not shutil.which("ffmpeg"): return input_wav
	output_wav = "outputs/mastered_output.wav"

	if style == "Radio / Studio":
	filter = "highpass=f=80, acompressor=threshold=-12dB:ratio=2:attack=5:release=50, equalizer=f=2000:t=q:w=1:g=2, loudnorm"
	elif style == "Natural":
	filter = "highpass=f=60, acompressor=threshold=-15dB:ratio=1.5:attack=10:release=100, loudnorm"
	else: return input_wav

	try:
	import subprocess
	subprocess.run(["ffmpeg", "-y", "-i", input_wav, "-af", filter, "-ar", "44100", output_wav], check=True)
	return output_wav
	except: return input_wav

	# --- 4. Main Workflow ---
	async def run_edge_tts(text, gender):
	voice = "my-MM-ThihaNeural" if gender == "Male" else "my-MM-NularNeural"
	output_file = "temp_base.mp3"
	await edge_tts.Communicate(text, voice).save(output_file)
	return output_file

	def predict(text, ref_audio, gender, mastering_style):
	if tone_color_converter is None:
	return "System Error: Model failed to load.", None
	if not text: return "စာရိုက်ထည့်ပါ", None
	if not ref_audio: return "Reference Audio ထည့်ပါ", None

	try:
	# Step A: Edge TTS
	base_audio = asyncio.run(run_edge_tts(text, gender))

	# Step B: OpenVoice V1
	os.makedirs("outputs", exist_ok=True)

	# VAD Handling
	try:
	target_se, _ = se_extractor.get_se(ref_audio, tone_color_converter, target_dir='outputs', vad=True)
	except:
	target_se, _ = se_extractor.get_se(ref_audio, tone_color_converter, target_dir='outputs', vad=False)

	source_se, _ = se_extractor.get_se(base_audio, tone_color_converter, target_dir='outputs', vad=False)

	raw_output = "outputs/raw_v1.wav"

	tone_color_converter.convert(
	audio_src_path=base_audio,
	src_se=source_se,
	tgt_se=target_se,
	output_path=raw_output,
	message="@NanoBanana"
	)

	# Step C: Mastering
	final_output = apply_mastering(raw_output, mastering_style)
	return "Success (V1)!", final_output

	except Exception as e:
	import traceback
	traceback.print_exc()
	return f"Error: {str(e)}", None

	# UI
	with gr.Blocks(title="Myanmar OpenVoice V1") as demo:
	gr.Markdown("# 🇲🇲 Myanmar Voice Cloning (OpenVoice V1)")
	gr.Markdown("V1 Model + CPU Stable Mode")

	with gr.Row():
	with gr.Column():
	input_text = gr.Textbox(label="မြန်မာစာ ရိုက်ပါ", lines=3, placeholder="မင်္ဂလာပါ...")
	with gr.Row():
	gender = gr.Radio(["Male", "Female"], value="Male", label="Base Voice")
	style = gr.Dropdown(["Radio / Studio", "Natural", "Raw"], value="Radio / Studio", label="Mastering")
	ref = gr.Audio(label="Reference Audio", type="filepath")
	btn = gr.Button("Generate Voice", variant="primary")
	with gr.Column():
	status = gr.Textbox(label="Status")
	out = gr.Audio(label="Result")

	btn.click(predict, [input_text, ref, gender, style], [status, out])

	demo.launch()