import gradio as gr import os import torch import commons import utils from models import SynthesizerTrn import numpy as np import json import shutil import logging import random import re from huggingface_hub import snapshot_download # --- 1. SETUP LOGGING --- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # --- 2. ROMAJI CONVERTER --- try: import pykakasi kks = pykakasi.kakasi() def to_romaji(text): if not text or text == "None": return "" try: result = kks.convert(str(text)) return "".join([item['hepburn'].capitalize() for item in result]) except: return str(text) except: def to_romaji(text): return str(text) # --- 3. CLEAN HARDCODED INFO --- CLEAN_INFO = { "0": {"title": "Sanoba Witch & Senren Banka", "example": "こんにちは。", "type": "vits"}, "1": {"title": "Hamidashi Creative", "example": "こんにちは。", "type": "vits"}, "2": {"title": "Cafe Stella & Shinigami no Chou", "example": "こんにちは。", "type": "vits"}, "3": {"title": "Yosuga no Sora", "example": "こんにちは。", "type": "vits"}, "4": {"title": "Bishoujo Mangekyou", "example": "こんにちは。", "type": "vits"}, "5": {"title": "Nene & Nanami Pack (Multi)", "example": "[JA]こんにちは。[JA]", "type": "vits"}, "6": {"title": "The Fox Waiting for You", "example": "안녕하세요.", "type": "vits"}, "7": {"title": "Galgame Characters Pack (13)", "example": "こんにちは。", "type": "vits"}, "8": {"title": "Zero no Tsukaima", "example": "こんにちは。", "type": "vits"}, "9": {"title": "Zero no Tsukaima (VC Mode)", "example": "", "type": "soft-vits-vc"}, "10": {"title": "Toaru Majutsu no Index (VC)", "example": "", "type": "soft-vits-vc"}, "11": {"title": "Shiki Natsume (VC Mode)", "example": "", "type": "soft-vits-vc"}, "12": {"title": "DRACU-RIOT!", "example": "こんにちは。", "type": "vits"}, "13": {"title": "To LOVE-Ru Series", "example": "こんにちは。", "type": "vits"}, "14": {"title": "CJKS Multi-Language", "example": "[JA]こんにちは。[JA]", "type": "vits"}, "15": {"title": "Voistock Mega Pack (2891 Chars)", "example": "[JA]こんにちは。[JA]", "type": "vits"}, "16": {"title": "Shanghainese Dialect", "example": "侬好!", "type": "vits"}, "17": {"title": "Chinese Dialects Pack", "example": "[SH]侬好![SH]", "type": "vits"}, "18": {"title": "Umamusume: Pretty Derby", "example": "こんにちは。", "type": "vits"}, "19": {"title": "Princess Connect! Re:Dive", "example": "[JA]こんにちは。[JA]", "type": "vits"}, "20": {"title": "Magia Record (Madoka Magica)", "example": "こんにちは。", "type": "vits"} } # --- 4. DOWNLOAD ASSETS --- REPO_ID = "Plana-Archive/Plana-TTS" LOCAL_ROOT = "saved_model" def download_assets(): os.makedirs(LOCAL_ROOT, exist_ok=True) if not os.path.exists(os.path.join(LOCAL_ROOT, "0")): try: logger.info("Downloading Assets...") snapshot_download(repo_id=REPO_ID, local_dir="temp_dir", allow_patterns=["MOE-TTS/saved_model/*"]) src_path = os.path.join("temp_dir", "MOE-TTS", "saved_model") if os.path.exists(src_path): shutil.copytree(src_path, LOCAL_ROOT, dirs_exist_ok=True) shutil.rmtree("temp_dir") except Exception as e: logger.error(f"Download error: {e}") download_assets() # --- 5. ENGINE LOAD MODEL --- loaded_models = {} def clean_config(conf): if isinstance(conf, dict): return {str(k): clean_config(v) for k, v in conf.items()} elif isinstance(conf, list): return [clean_config(i) for i in conf] return conf def get_vits_model(m_id): mid = str(m_id) if mid in loaded_models: return loaded_models[mid] try: p = os.path.join(LOCAL_ROOT, mid) cfg_p = os.path.join(p, "config.json") if not os.path.exists(cfg_p): return None hps = utils.get_hparams_from_file(cfg_p) m_params = clean_config(hps.model.__dict__ if hasattr(hps.model, '__dict__') else dict(hps.model)) net = SynthesizerTrn(len(hps.symbols), hps.data.filter_length // 2 + 1, hps.train.segment_size // hps.data.hop_length, n_speakers=hps.data.n_speakers, **m_params) utils.load_checkpoint(os.path.join(p, "model.pth"), net, None) net.eval() raw_spks = hps.speakers if hasattr(hps, 'speakers') else [f"Character {i}" for i in range(hps.data.n_speakers)] display_spks = [] original_spks = [] for s in raw_spks: romaji_name = to_romaji(s) if romaji_name and romaji_name.lower() != "none": display_spks.append(romaji_name) original_spks.append(s) loaded_models[mid] = (hps, net, display_spks, original_spks) return loaded_models[mid] except Exception as e: logger.error(f"Load Error {mid}: {e}") return None def tts_execute(m_id, text, speaker_romaji, speed): data = get_vits_model(m_id) if not data: return "❌ Model Loading...", None hps, net, display_spks, _ = data if not speaker_romaji: if display_spks: speaker_romaji = display_spks[0] else: return "❌ No Speaker Selected", None try: sid = display_spks.index(speaker_romaji) from text import text_to_sequence clean_text = re.sub(r'\[[A-Z]{2}\]', '', text) cleaners = hps.data.text_cleaners if hasattr(hps.data, 'text_cleaners') else ['japanese_cleaners'] seq = text_to_sequence(clean_text, hps.symbols, cleaners) if hps.data.add_blank: seq = commons.intersperse(seq, 0) with torch.no_grad(): audio = net.infer(torch.LongTensor(seq).unsqueeze(0), torch.LongTensor([len(seq)]), sid=torch.LongTensor([sid]), noise_scale=0.667, noise_scale_w=0.8, length_scale=1.0/speed)[0][0,0].data.cpu().float().numpy() return f"✅ Done!", (hps.data.sampling_rate, (audio * 32767).astype(np.int16)) except Exception as e: return f"Error: {e}", None def get_random_jp(): return random.choice(["こんにちは!", "お元気ですか?", "先生、お疲れ様です!", "大好きだよ!", "また明日ね。"]) def get_char_info_html(m_id): data = get_vits_model(m_id) if not data: return "" _, _, _, original_names = data html = f"""
""" for i, name in enumerate(original_names): html += f"""
{name}
Character {i+1}
""" html += "
" return html # --- 6. UI STYLE --- css = """ :root { --primary-600: #1299ff !important; --accent-600: #1299ff !important; } .gradio-container, .gradio-container * { --loader-color: #A2D2FF !important; } .loading { border-top-color: #A2D2FF !important; } svg.loading { fill: #A2D2FF !important; } /* Judul Header */ .ba-header-container { border: 1.5px solid #e1e8f0; border-radius: 12px; padding: 20px 10px; margin-bottom: 12px; background: white; text-align: center; } .ba-header-container h1 { color: #1299ff !important; font-weight: 700 !important; font-size: 42px !important; margin: 0; line-height: 1.1; } .ba-header-container p { color: #8a99af; font-size: 11px; font-weight: 800; letter-spacing: 2px; margin: 8px 0 0 0; text-transform: uppercase; } /* System Status Box */ .status-container { border: 1.5px solid #e1e8f0; border-radius: 12px; padding: 15px 22px; margin-bottom: 20px; background: white; } .status-title { color: #1299ff !important; /* Biru sesuai permintaan */ font-weight: 800; font-size: 16px; margin-bottom: 8px; } .status-item { display: flex; align-items: center; gap: 10px; font-size: 14px; font-weight: 700; margin-bottom: 4px; } .text-dark-gray { color: #4a5568 !important; /* Abu-abu Hitam */ } .text-green-bold { color: #28a745 !important; /* Hijau Tebal */ font-weight: 900 !important; text-shadow: none !important; /* Tanpa bayangan */ } .text-blue-status { color: #1299ff !important; /* Biru */ } .status-dot { width: 8px; height: 8px; background-color: #1299ff; border-radius: 50%; } .pulse-dot { width: 8px; height: 8px; background-color: #1299ff; border-radius: 50%; position: relative; } .pulse-dot::after { content: ""; position: absolute; width: 100%; height: 100%; background-color: #1299ff; border-radius: 50%; animation: pulse 2.5s infinite; } @keyframes pulse { 0% { transform: scale(1); opacity: 0.8; } 100% { transform: scale(3); opacity: 0; } } .slim-card { max-width: 480px; margin: 0 auto; background: transparent; padding: 10px; } .tabs > .tab-nav { display: flex !important; overflow-x: auto !important; white-space: nowrap !important; flex-wrap: nowrap !important; } .tabs > .tab-nav button { flex: 0 0 auto !important; } .scroll-box { height: 200px; overflow-y: auto; border: 1px solid #f0f4f8; border-radius: 12px; padding: 10px; background: #fafbfc; margin-bottom: 10px; } .char-btn { background: white !important; border: 1px solid #e2e8f0 !important; border-left: 5px solid #1299ff !important; text-align: left !important; padding: 8px !important; font-size: 12px !important; margin-bottom: 4px !important; width: 100%; color: #4a5568 !important; } .char-btn:hover { background: #f0f7ff !important; border-color: #1299ff !important; } .warning-card { background: #fff9f0; border: 2px dashed #f5a623; border-radius: 10px; padding: 12px; margin-bottom: 15px; text-align: center; } .warning-title { color: #f5a623; font-weight: 800; font-size: 13px; margin-bottom: 4px; display: flex; align-items: center; justify-content: center; gap: 5px; } .warning-text { color: #855d1a; font-size: 11px; line-height: 1.5; font-weight: 600; } .jp-btn { background: #f8fafc !important; border: 1px solid #cbd5e1 !important; color: #475569 !important; font-weight: 700 !important; border-radius: 10px !important; margin-bottom: 10px; font-size: 12px !important; width: 100%; } .gen-btn { background: #1299ff !important; color: white !important; font-weight: 700 !important; border-radius: 12px !important; height: 45px !important; width: 100%; border: none !important; cursor: pointer; transition: 0.3s; } .info-header-custom { background: #1299ff !important; color: white !important; border: none !important; border-radius: 8px 8px 0 0 !important; padding: 12px 15px !important; width: 100% !important; cursor: pointer; display: flex !important; justify-content: center !important; align-items: center !important; font-weight: 800 !important; font-size: 14px !important; margin-top: 15px; gap: 15px; } .credit-footer { margin-top: 25px; padding: 15px; background: white; border-radius: 12px; text-align: center; border: 1px solid #eef2f6; border-bottom: 4px solid #1299ff; color: #94a3b8; font-weight: 700; font-size: 12px; letter-spacing: 2px; } """ with gr.Blocks(css=css) as demo: with gr.Column(elem_classes="slim-card"): # Header Blue Archive gr.HTML("""

Library Anime

🍂 Style-Bert-VITS2 🍂

System Status
Model :  LOADED ✅
System :  Online
""") with gr.Tabs(elem_classes="tabs"): for m_id in sorted(CLEAN_INFO.keys(), key=int): with gr.Tab(f"Model {m_id}"): gr.Markdown(f"### 📂 {CLEAN_INFO[m_id]['title']}") m_data = get_vits_model(m_id) chars = m_data[2] if m_data else [] m_p = os.path.join(LOCAL_ROOT, str(m_id)) cov = next((os.path.join(m_p, f"cover.{e}") for e in ['png','jpg','jpeg','webp'] if os.path.exists(os.path.join(m_p, f"cover.{e}"))), None) if cov: gr.Image(cov, show_label=False, interactive=False, height=140) sel_name = gr.State("") char_display = gr.Markdown("📍 *Silakan pilih karakter...*") gr.HTML("

CHARACTER LIST (ROMAJI)

") with gr.Column(elem_classes="scroll-box"): if chars: for name in chars: btn = gr.Button(f"👤 {name}", elem_classes="char-btn") btn.click(fn=lambda n=name: (n, f"📍 Selected: **{n}**"), outputs=[sel_name, char_display]) gr.HTML("""
🔖 PERINGATAN MINNA 🔖
Model 19 is not working. Cara pakai VITS ini klik aja character lalu masukkan input text dan Generate Voice! Done ✨
""") ex_text = re.sub(r'\[[A-Z]{2}\]', '', CLEAN_INFO[m_id].get("example", "こんにちは。")) txt_in = gr.TextArea(label="Input Text", value=ex_text, lines=3) gr.Button("🎲 INPUTS RANDOM TEXT 🎲", elem_classes="jp-btn").click(get_random_jp, outputs=[txt_in]) spd = gr.Slider(0.5, 2.0, 1.0, step=0.1, label="Speed Audio") btn_gen = gr.Button("🎐 GENERATE VOICE 🎐", elem_classes="gen-btn") aud_out = gr.Audio(label="Voice Output") gr.HTML(f""" """) gr.HTML(get_char_info_html(m_id)) status_log = gr.Textbox(visible=False) btn_gen.click(fn=tts_execute, inputs=[gr.State(m_id), txt_in, sel_name, spd], outputs=[status_log, aud_out]) gr.HTML("""""") if __name__ == "__main__": demo.launch()