Spaces:
Build error
Build error
| import os | |
| import tempfile | |
| import uvicorn | |
| import urllib.request | |
| from fastapi import FastAPI, Response, HTTPException | |
| from TTS.api import TTS | |
| from g2p_id import G2p | |
| # 1. DEFINISIKAN PATH DI PALING ATAS | |
| BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| JULIET_WAV = os.path.join(BASE_DIR, "juliet.wav") | |
| MODEL_PATH = os.path.join(BASE_DIR, "best_model.pth") | |
| CONFIG_PATH = os.path.join(BASE_DIR, "config.json") | |
| app = FastAPI() | |
| # Global variables untuk multi-engine | |
| vits_engine = None | |
| vc_engine = None | |
| g2p_engine = None | |
| # Mapping Voice Bank | |
| VOICES = { | |
| "juliet": JULIET_WAV | |
| } | |
| def download_model_assets(): | |
| print("[*] Verifying native model assets...") | |
| assets = { | |
| "best_model.pth": "https://huggingface.co/Wikidepia/indonesian-tts/resolve/main/best_model.pth", | |
| "config.json": "https://huggingface.co/Wikidepia/indonesian-tts/resolve/main/config.json" | |
| } | |
| for filename, url in assets.items(): | |
| file_path = os.path.join(BASE_DIR, filename) | |
| if not os.path.exists(file_path): | |
| print(f"[*] Downloading {filename}...") | |
| urllib.request.urlretrieve(url, file_path) | |
| def root(): | |
| model_loaded = (vits_engine is not None) and (vc_engine is not None) | |
| return { | |
| "status": "SharX Hybrid Juliet-ID Engine Online ⚡", | |
| "model_status": "Ready" if model_loaded else "Standby (Cold Start Required)", | |
| "file_check": "juliet.wav Found" if os.path.exists(JULIET_WAV) else "juliet.wav MISSING" | |
| } | |
| def generate_xtts(text: str, speaker: str = "juliet"): | |
| global vits_engine, vc_engine, g2p_engine | |
| # Lazy Loading: Dual-model architecture load | |
| if vits_engine is None or vc_engine is None: | |
| print("[*] COLD START: Loading Native VITS, G2P, and Voice Conversion...") | |
| try: | |
| download_model_assets() | |
| os.environ["COQUI_TOS_AGREED"] = "1" | |
| # Engine 1: Native VITS for perfect ID pronunciation | |
| vits_engine = TTS(model_path=MODEL_PATH, config_path=CONFIG_PATH, progress_bar=False).to("cpu") | |
| g2p_engine = G2p() | |
| # Engine 2: FreeVC for zero-shot tone transfer to Juliet | |
| print("[*] Loading Tone Transfer Engine (FreeVC24)...") | |
| vc_engine = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False).to("cpu") | |
| print("[+] All Engines loaded successfully!") | |
| except Exception as e: | |
| print(f"[-] Model Load Error: {str(e)}") | |
| raise HTTPException(status_code=500, detail=f"Gagal memuat model: {str(e)}") | |
| # Cek ketersediaan file referensi | |
| ref_audio = VOICES.get(speaker) | |
| if not ref_audio or not os.path.exists(ref_audio): | |
| raise HTTPException(status_code=400, detail=f"File referensi {speaker} tidak ditemukan!") | |
| # File temporary routing | |
| temp_base = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") | |
| base_out_path = temp_base.name | |
| temp_base.close() | |
| temp_final = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") | |
| final_out_path = temp_final.name | |
| temp_final.close() | |
| try: | |
| # Stage 1: G2P Mapping | |
| phonemes = g2p_engine(text) | |
| if isinstance(phonemes, list): | |
| phonemes = " ".join(phonemes) | |
| print(f"[*] Stage 1 (G2P): {phonemes[:40]}...") | |
| # Stage 2: Generate Native Base Audio | |
| # Mapping ke speaker 'gadis' jika ada, fallback ke default index pertama jika tidak | |
| target_base_speaker = "gadis" if "gadis" in vits_engine.speakers else vits_engine.speakers[0] | |
| vits_engine.tts_to_file( | |
| text=phonemes, | |
| speaker=target_base_speaker, | |
| file_path=base_out_path | |
| ) | |
| print(f"[*] Stage 2 (Base Audio): Created temp native ID audio.") | |
| # Stage 3: Voice Tone Conversion (Overwrite with Juliet) | |
| print(f"[*] Stage 3 (Tone Transfer): Injecting {speaker}.wav identity...") | |
| vc_engine.voice_conversion_to_file( | |
| source_wav=base_out_path, | |
| target_wav=ref_audio, | |
| file_path=final_out_path | |
| ) | |
| # Read final output | |
| with open(final_out_path, "rb") as f: | |
| audio_data = f.read() | |
| return Response(content=audio_data, media_type="audio/wav") | |
| except Exception as e: | |
| print(f"[-] Inference Error: {str(e)}") | |
| raise HTTPException(status_code=500, detail=f"Gagal generate audio hybrid: {str(e)}") | |
| finally: | |
| # Strict cleanup to prevent Docker container storage overflow | |
| if os.path.exists(base_out_path): | |
| os.remove(base_out_path) | |
| if os.path.exists(final_out_path): | |
| os.remove(final_out_path) | |
| if __name__ == "__main__": | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |