Spaces:

workebridge
/

omon

Build error

App Files Files Community

omon / app.py

workebridge

Update app.py

ceca7a3 verified about 1 month ago

raw

history blame contribute delete

4.84 kB

	import os
	import tempfile
	import uvicorn
	import urllib.request
	from fastapi import FastAPI, Response, HTTPException
	from TTS.api import TTS
	from g2p_id import G2p

	# 1. DEFINISIKAN PATH DI PALING ATAS
	BASE_DIR = os.path.dirname(os.path.abspath(__file__))
	JULIET_WAV = os.path.join(BASE_DIR, "juliet.wav")
	MODEL_PATH = os.path.join(BASE_DIR, "best_model.pth")
	CONFIG_PATH = os.path.join(BASE_DIR, "config.json")

	app = FastAPI()

	# Global variables untuk multi-engine
	vits_engine = None
	vc_engine = None
	g2p_engine = None

	# Mapping Voice Bank
	VOICES = {
	"juliet": JULIET_WAV
	}

	def download_model_assets():
	print("[*] Verifying native model assets...")
	assets = {
	"best_model.pth": "https://huggingface.co/Wikidepia/indonesian-tts/resolve/main/best_model.pth",
	"config.json": "https://huggingface.co/Wikidepia/indonesian-tts/resolve/main/config.json"
	}
	for filename, url in assets.items():
	file_path = os.path.join(BASE_DIR, filename)
	if not os.path.exists(file_path):
	print(f"[*] Downloading {filename}...")
	urllib.request.urlretrieve(url, file_path)

	@app.get("/")
	def root():
	model_loaded = (vits_engine is not None) and (vc_engine is not None)
	return {
	"status": "SharX Hybrid Juliet-ID Engine Online ⚡",
	"model_status": "Ready" if model_loaded else "Standby (Cold Start Required)",
	"file_check": "juliet.wav Found" if os.path.exists(JULIET_WAV) else "juliet.wav MISSING"
	}

	@app.get("/tts")
	def generate_xtts(text: str, speaker: str = "juliet"):
	global vits_engine, vc_engine, g2p_engine

	# Lazy Loading: Dual-model architecture load
	if vits_engine is None or vc_engine is None:
	print("[*] COLD START: Loading Native VITS, G2P, and Voice Conversion...")
	try:
	download_model_assets()
	os.environ["COQUI_TOS_AGREED"] = "1"

	# Engine 1: Native VITS for perfect ID pronunciation
	vits_engine = TTS(model_path=MODEL_PATH, config_path=CONFIG_PATH, progress_bar=False).to("cpu")
	g2p_engine = G2p()

	# Engine 2: FreeVC for zero-shot tone transfer to Juliet
	print("[*] Loading Tone Transfer Engine (FreeVC24)...")
	vc_engine = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False).to("cpu")
	print("[+] All Engines loaded successfully!")
	except Exception as e:
	print(f"[-] Model Load Error: {str(e)}")
	raise HTTPException(status_code=500, detail=f"Gagal memuat model: {str(e)}")

	# Cek ketersediaan file referensi
	ref_audio = VOICES.get(speaker)
	if not ref_audio or not os.path.exists(ref_audio):
	raise HTTPException(status_code=400, detail=f"File referensi {speaker} tidak ditemukan!")

	# File temporary routing
	temp_base = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
	base_out_path = temp_base.name
	temp_base.close()

	temp_final = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
	final_out_path = temp_final.name
	temp_final.close()

	try:
	# Stage 1: G2P Mapping
	phonemes = g2p_engine(text)
	if isinstance(phonemes, list):
	phonemes = " ".join(phonemes)
	print(f"[*] Stage 1 (G2P): {phonemes[:40]}...")

	# Stage 2: Generate Native Base Audio
	# Mapping ke speaker 'gadis' jika ada, fallback ke default index pertama jika tidak
	target_base_speaker = "gadis" if "gadis" in vits_engine.speakers else vits_engine.speakers[0]
	vits_engine.tts_to_file(
	text=phonemes,
	speaker=target_base_speaker,
	file_path=base_out_path
	)
	print(f"[*] Stage 2 (Base Audio): Created temp native ID audio.")

	# Stage 3: Voice Tone Conversion (Overwrite with Juliet)
	print(f"[*] Stage 3 (Tone Transfer): Injecting {speaker}.wav identity...")
	vc_engine.voice_conversion_to_file(
	source_wav=base_out_path,
	target_wav=ref_audio,
	file_path=final_out_path
	)

	# Read final output
	with open(final_out_path, "rb") as f:
	audio_data = f.read()

	return Response(content=audio_data, media_type="audio/wav")

	except Exception as e:
	print(f"[-] Inference Error: {str(e)}")
	raise HTTPException(status_code=500, detail=f"Gagal generate audio hybrid: {str(e)}")

	finally:
	# Strict cleanup to prevent Docker container storage overflow
	if os.path.exists(base_out_path):
	os.remove(base_out_path)
	if os.path.exists(final_out_path):
	os.remove(final_out_path)

	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=7860)