import gradio as gr import os import torch import commons import utils from models import SynthesizerTrn import numpy as np import json import shutil from huggingface_hub import snapshot_download # --- OPTIONAL ROMAJI CONVERTER --- try: import pykakasi kks = pykakasi.kakasi() def to_romaji(text): if not text: return text text_str = str(text) result = kks.convert(text_str) romaji = "".join([item['hepburn'].capitalize() for item in result]) return romaji if romaji and romaji.lower() != text_str.lower() else text_str except ImportError: def to_romaji(text): return str(text) # --- DOWNLOAD ASSETS --- REPO_ID = "Plana-Archive/Plana-TTS" LOCAL_ROOT = "saved_model" def download_assets(): if not os.path.exists(os.path.join(LOCAL_ROOT, "info.json")): try: snapshot_download(repo_id=REPO_ID, local_dir=".", allow_patterns=["info.json"]) if os.path.exists("info.json"): os.makedirs(LOCAL_ROOT, exist_ok=True) shutil.move("info.json", os.path.join(LOCAL_ROOT, "info.json")) snapshot_download(repo_id=REPO_ID, local_dir=LOCAL_ROOT, allow_patterns=["MOE-TTS/saved_model/*"]) wrong_path = os.path.join(LOCAL_ROOT, "MOE-TTS", "saved_model") if os.path.exists(wrong_path): for item in os.listdir(wrong_path): shutil.move(os.path.join(wrong_path, item), os.path.join(LOCAL_ROOT, item)) shutil.rmtree(os.path.join(LOCAL_ROOT, "MOE-TTS")) except Exception as e: print(f"Download error: {str(e)}") download_assets() # --- MODEL ENGINE (PERBAIKAN KRUSIAL MODEL 19) --- loaded_models = {} def clean_config(conf): """ Mengonversi semua key dalam dictionary menjadi string secara rekursif. Ini wajib untuk model dengan banyak speaker seperti Model 19 agar tidak error 'int'. """ if isinstance(conf, dict): return {str(k): clean_config(v) for k, v in conf.items()} elif isinstance(conf, list): return [clean_config(i) for i in conf] return conf def get_vits_model(m_id): mid = str(m_id) if mid in loaded_models: return loaded_models[mid] try: p = os.path.join(LOCAL_ROOT, mid) config_path = os.path.join(p, "config.json") if not os.path.exists(config_path): return None hps = utils.get_hparams_from_file(config_path) # Ambil parameter model dan bersihkan dari tipe data int pada key if hasattr(hps, 'model'): # Mengakses dictionary internal dari objek HParams model_dict = hps.model.__dict__ if hasattr(hps.model, '__dict__') else dict(hps.model) model_params = clean_config(model_dict) else: model_params = {} net = SynthesizerTrn( len(hps.symbols), hps.data.filter_length // 2 + 1, hps.train.segment_size // hps.data.hop_length, n_speakers=hps.data.n_speakers, **model_params ) utils.load_checkpoint(os.path.join(p, "model.pth"), net, None) net.eval() raw_spks = hps.speakers if hasattr(hps, 'speakers') else [f"Character {i}" for i in range(hps.data.n_speakers)] display_spks = [to_romaji(s) for s in raw_spks] loaded_models[mid] = (hps, net, display_spks, raw_spks) return loaded_models[mid] except Exception as e: print(f"Error loading model {m_id}: {str(e)}") return None def tts_execute(m_id, text, speaker_display, speed): data = get_vits_model(m_id) if not data: return None hps, net, display_spks, raw_spks = data try: sid = display_spks.index(speaker_display) from text import text_to_sequence cleaners = hps.data.text_cleaners if hasattr(hps.data, 'text_cleaners') else ['japanese_cleaners'] seq = text_to_sequence(text, hps.symbols, cleaners) if hps.data.add_blank: seq = commons.intersperse(seq, 0) x = torch.LongTensor(seq).unsqueeze(0) x_len = torch.LongTensor([len(seq)]) with torch.no_grad(): audio = net.infer(x, x_len, sid=torch.LongTensor([sid]), noise_scale=0.667, noise_scale_w=0.8, length_scale=1.0/speed)[0][0,0].data.cpu().float().numpy() return (hps.data.sampling_rate, (audio / np.abs(audio).max() * 32767).astype(np.int16)) except Exception as e: print(f"TTS error: {str(e)}") return None # --- UI DESIGN (UKURAN TETAP SESUAI ASLI) --- css = """ .gradio-container { max-width: 850px !important; margin: 0 auto !important; padding: 10px !important; } .header-box { background: white; border-radius: 15px; padding: 25px !important; margin-bottom: 20px !important; border-top: 6px solid #5f6caf; text-align: center; box-shadow: 0 2px 10px rgba(0,0,0,0.05); } .header-box h1 { font-size: 24px !important; margin: 0; } .tabs-wrapper { border: 1px dashed #cbd5e0 !important; border-radius: 15px !important; padding: 15px !important; background: white; margin-bottom: 20px; } .content-area { background: white; border-radius: 15px !important; padding: 20px !important; border: 1px solid #eee !important; width: 100% !important; } .model-title { font-size: 18px !important; font-weight: bold; margin-bottom: 10px; color: #333; } .footer-box { background: white; border-radius: 12px; padding: 20px !important; margin-top: 20px !important; text-align: center !important; border: 1px solid #eee; } footer { display: none !important; } """ with gr.Blocks(css=css, title="MOE-TTS", theme=gr.themes.Soft()) as demo: with gr.Column(elem_classes="header-box"): gr.Markdown("# Library Anime TTS\n### LIBRARY ANIME PREMIUM") info_path = os.path.join(LOCAL_ROOT, "info.json") if os.path.exists(info_path): with open(info_path, "r", encoding="utf-8") as f: all_info = json.load(f) with gr.Column(elem_classes="tabs-wrapper"): sorted_keys = sorted(all_info.keys(), key=int) with gr.Tabs(): for m_id in sorted_keys: info = all_info[m_id] m_path = os.path.join(LOCAL_ROOT, str(m_id)) if not os.path.exists(m_path): continue with gr.Tab(f"Model {m_id}"): m_res = get_vits_model(m_id) is_ok = m_res is not None spks = m_res[2] if is_ok else ["Error Loading Model"] COVER_FILE = None for ext in ['jpg', 'png', 'jpeg', 'webp']: tmp = os.path.join(m_path, f"cover.{ext}") if os.path.exists(tmp): COVER_FILE = tmp; break with gr.Column(elem_classes="content-area"): with gr.Row(): with gr.Column(scale=1): if COVER_FILE: gr.Image(COVER_FILE, show_label=False) t_romaji = to_romaji(info.get('title', 'Model')) gr.Markdown(f"