omon / app.py
workebridge's picture
Update app.py
ceca7a3 verified
import os
import tempfile
import uvicorn
import urllib.request
from fastapi import FastAPI, Response, HTTPException
from TTS.api import TTS
from g2p_id import G2p
# 1. DEFINISIKAN PATH DI PALING ATAS
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
JULIET_WAV = os.path.join(BASE_DIR, "juliet.wav")
MODEL_PATH = os.path.join(BASE_DIR, "best_model.pth")
CONFIG_PATH = os.path.join(BASE_DIR, "config.json")
app = FastAPI()
# Global variables untuk multi-engine
vits_engine = None
vc_engine = None
g2p_engine = None
# Mapping Voice Bank
VOICES = {
"juliet": JULIET_WAV
}
def download_model_assets():
print("[*] Verifying native model assets...")
assets = {
"best_model.pth": "https://huggingface.co/Wikidepia/indonesian-tts/resolve/main/best_model.pth",
"config.json": "https://huggingface.co/Wikidepia/indonesian-tts/resolve/main/config.json"
}
for filename, url in assets.items():
file_path = os.path.join(BASE_DIR, filename)
if not os.path.exists(file_path):
print(f"[*] Downloading {filename}...")
urllib.request.urlretrieve(url, file_path)
@app.get("/")
def root():
model_loaded = (vits_engine is not None) and (vc_engine is not None)
return {
"status": "SharX Hybrid Juliet-ID Engine Online ⚡",
"model_status": "Ready" if model_loaded else "Standby (Cold Start Required)",
"file_check": "juliet.wav Found" if os.path.exists(JULIET_WAV) else "juliet.wav MISSING"
}
@app.get("/tts")
def generate_xtts(text: str, speaker: str = "juliet"):
global vits_engine, vc_engine, g2p_engine
# Lazy Loading: Dual-model architecture load
if vits_engine is None or vc_engine is None:
print("[*] COLD START: Loading Native VITS, G2P, and Voice Conversion...")
try:
download_model_assets()
os.environ["COQUI_TOS_AGREED"] = "1"
# Engine 1: Native VITS for perfect ID pronunciation
vits_engine = TTS(model_path=MODEL_PATH, config_path=CONFIG_PATH, progress_bar=False).to("cpu")
g2p_engine = G2p()
# Engine 2: FreeVC for zero-shot tone transfer to Juliet
print("[*] Loading Tone Transfer Engine (FreeVC24)...")
vc_engine = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False).to("cpu")
print("[+] All Engines loaded successfully!")
except Exception as e:
print(f"[-] Model Load Error: {str(e)}")
raise HTTPException(status_code=500, detail=f"Gagal memuat model: {str(e)}")
# Cek ketersediaan file referensi
ref_audio = VOICES.get(speaker)
if not ref_audio or not os.path.exists(ref_audio):
raise HTTPException(status_code=400, detail=f"File referensi {speaker} tidak ditemukan!")
# File temporary routing
temp_base = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
base_out_path = temp_base.name
temp_base.close()
temp_final = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
final_out_path = temp_final.name
temp_final.close()
try:
# Stage 1: G2P Mapping
phonemes = g2p_engine(text)
if isinstance(phonemes, list):
phonemes = " ".join(phonemes)
print(f"[*] Stage 1 (G2P): {phonemes[:40]}...")
# Stage 2: Generate Native Base Audio
# Mapping ke speaker 'gadis' jika ada, fallback ke default index pertama jika tidak
target_base_speaker = "gadis" if "gadis" in vits_engine.speakers else vits_engine.speakers[0]
vits_engine.tts_to_file(
text=phonemes,
speaker=target_base_speaker,
file_path=base_out_path
)
print(f"[*] Stage 2 (Base Audio): Created temp native ID audio.")
# Stage 3: Voice Tone Conversion (Overwrite with Juliet)
print(f"[*] Stage 3 (Tone Transfer): Injecting {speaker}.wav identity...")
vc_engine.voice_conversion_to_file(
source_wav=base_out_path,
target_wav=ref_audio,
file_path=final_out_path
)
# Read final output
with open(final_out_path, "rb") as f:
audio_data = f.read()
return Response(content=audio_data, media_type="audio/wav")
except Exception as e:
print(f"[-] Inference Error: {str(e)}")
raise HTTPException(status_code=500, detail=f"Gagal generate audio hybrid: {str(e)}")
finally:
# Strict cleanup to prevent Docker container storage overflow
if os.path.exists(base_out_path):
os.remove(base_out_path)
if os.path.exists(final_out_path):
os.remove(final_out_path)
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)