tts / app.py
Plana-Archive's picture
Update app.py
aa17c31 verified
import gradio as gr
import os
import torch
import commons
import utils
from models import SynthesizerTrn
import numpy as np
import json
import shutil
import logging
import random
import re
from huggingface_hub import snapshot_download
# --- 1. SETUP LOGGING ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# --- 2. ROMAJI CONVERTER ---
try:
import pykakasi
kks = pykakasi.kakasi()
def to_romaji(text):
if not text or text == "None": return ""
try:
result = kks.convert(str(text))
return "".join([item['hepburn'].capitalize() for item in result])
except:
return str(text)
except:
def to_romaji(text): return str(text)
# --- 3. CLEAN HARDCODED INFO ---
CLEAN_INFO = {
"0": {"title": "Sanoba Witch & Senren Banka", "example": "こんにけは。", "type": "vits"},
"1": {"title": "Hamidashi Creative", "example": "こんにけは。", "type": "vits"},
"2": {"title": "Cafe Stella & Shinigami no Chou", "example": "こんにけは。", "type": "vits"},
"3": {"title": "Yosuga no Sora", "example": "こんにけは。", "type": "vits"},
"4": {"title": "Bishoujo Mangekyou", "example": "こんにけは。", "type": "vits"},
"5": {"title": "Nene & Nanami Pack (Multi)", "example": "[JA]こんにけは。[JA]", "type": "vits"},
"6": {"title": "The Fox Waiting for You", "example": "μ•ˆλ…•ν•˜μ„Έμš”.", "type": "vits"},
"7": {"title": "Galgame Characters Pack (13)", "example": "こんにけは。", "type": "vits"},
"8": {"title": "Zero no Tsukaima", "example": "こんにけは。", "type": "vits"},
"9": {"title": "Zero no Tsukaima (VC Mode)", "example": "", "type": "soft-vits-vc"},
"10": {"title": "Toaru Majutsu no Index (VC)", "example": "", "type": "soft-vits-vc"},
"11": {"title": "Shiki Natsume (VC Mode)", "example": "", "type": "soft-vits-vc"},
"12": {"title": "DRACU-RIOT!", "example": "こんにけは。", "type": "vits"},
"13": {"title": "To LOVE-Ru Series", "example": "こんにけは。", "type": "vits"},
"14": {"title": "CJKS Multi-Language", "example": "[JA]こんにけは。[JA]", "type": "vits"},
"15": {"title": "Voistock Mega Pack (2891 Chars)", "example": "[JA]こんにけは。[JA]", "type": "vits"},
"16": {"title": "Shanghainese Dialect", "example": "δΎ¬ε₯½οΌ", "type": "vits"},
"17": {"title": "Chinese Dialects Pack", "example": "[SH]δΎ¬ε₯½οΌ[SH]", "type": "vits"},
"18": {"title": "Umamusume: Pretty Derby", "example": "こんにけは。", "type": "vits"},
"19": {"title": "Princess Connect! Re:Dive", "example": "[JA]こんにけは。[JA]", "type": "vits"},
"20": {"title": "Magia Record (Madoka Magica)", "example": "こんにけは。", "type": "vits"}
}
# --- 4. DOWNLOAD ASSETS ---
REPO_ID = "Plana-Archive/Plana-TTS"
LOCAL_ROOT = "saved_model"
def download_assets():
os.makedirs(LOCAL_ROOT, exist_ok=True)
if not os.path.exists(os.path.join(LOCAL_ROOT, "0")):
try:
logger.info("Downloading Assets...")
snapshot_download(repo_id=REPO_ID, local_dir="temp_dir", allow_patterns=["MOE-TTS/saved_model/*"])
src_path = os.path.join("temp_dir", "MOE-TTS", "saved_model")
if os.path.exists(src_path):
shutil.copytree(src_path, LOCAL_ROOT, dirs_exist_ok=True)
shutil.rmtree("temp_dir")
except Exception as e:
logger.error(f"Download error: {e}")
download_assets()
# --- 5. ENGINE LOAD MODEL ---
loaded_models = {}
def clean_config(conf):
if isinstance(conf, dict): return {str(k): clean_config(v) for k, v in conf.items()}
elif isinstance(conf, list): return [clean_config(i) for i in conf]
return conf
def get_vits_model(m_id):
mid = str(m_id)
if mid in loaded_models: return loaded_models[mid]
try:
p = os.path.join(LOCAL_ROOT, mid)
cfg_p = os.path.join(p, "config.json")
if not os.path.exists(cfg_p): return None
hps = utils.get_hparams_from_file(cfg_p)
m_params = clean_config(hps.model.__dict__ if hasattr(hps.model, '__dict__') else dict(hps.model))
net = SynthesizerTrn(len(hps.symbols), hps.data.filter_length // 2 + 1, hps.train.segment_size // hps.data.hop_length, n_speakers=hps.data.n_speakers, **m_params)
utils.load_checkpoint(os.path.join(p, "model.pth"), net, None)
net.eval()
raw_spks = hps.speakers if hasattr(hps, 'speakers') else [f"Character {i}" for i in range(hps.data.n_speakers)]
display_spks = []
original_spks = []
for s in raw_spks:
romaji_name = to_romaji(s)
if romaji_name and romaji_name.lower() != "none":
display_spks.append(romaji_name)
original_spks.append(s)
loaded_models[mid] = (hps, net, display_spks, original_spks)
return loaded_models[mid]
except Exception as e:
logger.error(f"Load Error {mid}: {e}")
return None
def tts_execute(m_id, text, speaker_romaji, speed):
data = get_vits_model(m_id)
if not data: return "❌ Model Loading...", None
hps, net, display_spks, _ = data
if not speaker_romaji:
if display_spks:
speaker_romaji = display_spks[0]
else:
return "❌ No Speaker Selected", None
try:
sid = display_spks.index(speaker_romaji)
from text import text_to_sequence
clean_text = re.sub(r'\[[A-Z]{2}\]', '', text)
cleaners = hps.data.text_cleaners if hasattr(hps.data, 'text_cleaners') else ['japanese_cleaners']
seq = text_to_sequence(clean_text, hps.symbols, cleaners)
if hps.data.add_blank: seq = commons.intersperse(seq, 0)
with torch.no_grad():
audio = net.infer(torch.LongTensor(seq).unsqueeze(0), torch.LongTensor([len(seq)]), sid=torch.LongTensor([sid]), noise_scale=0.667, noise_scale_w=0.8, length_scale=1.0/speed)[0][0,0].data.cpu().float().numpy()
return f"βœ… Done!", (hps.data.sampling_rate, (audio * 32767).astype(np.int16))
except Exception as e: return f"Error: {e}", None
def get_random_jp():
return random.choice(["こんにけは!", "γŠε…ƒζ°—γ§γ™γ‹οΌŸ", "ε…ˆη”Ÿγ€γŠη–²γ‚Œζ§˜γ§γ™οΌ", "ε€§ε₯½γγ γ‚ˆοΌ", "また明ζ—₯ね。"])
def get_char_info_html(m_id):
data = get_vits_model(m_id)
if not data: return ""
_, _, _, original_names = data
html = f"""<div class="info-content-area" id="info-content-{m_id}" style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; padding: 15px; background: white; border: 1px solid #f0f4f8; border-top: none; border-radius: 0 0 10px 10px;">"""
for i, name in enumerate(original_names):
html += f"""
<div style="border: 1px solid #f0f4f8; border-radius: 12px; padding: 10px; border-left: 5px solid #1299ff; background: #fff;">
<div style="font-weight: 800; color: #2d3748; font-size: 14px; margin-bottom: 2px;">{name}</div>
<div style="color: #a0aec0; font-size: 11px;">Character {i+1}</div>
</div>
"""
html += "</div>"
return html
# --- 6. UI STYLE ---
css = """
:root {
--primary-600: #1299ff !important;
--accent-600: #1299ff !important;
}
.gradio-container, .gradio-container * {
--loader-color: #A2D2FF !important;
}
.loading { border-top-color: #A2D2FF !important; }
svg.loading { fill: #A2D2FF !important; }
/* Judul Header */
.ba-header-container {
border: 1.5px solid #e1e8f0;
border-radius: 12px;
padding: 20px 10px;
margin-bottom: 12px;
background: white;
text-align: center;
}
.ba-header-container h1 {
color: #1299ff !important;
font-weight: 700 !important;
font-size: 42px !important;
margin: 0;
line-height: 1.1;
}
.ba-header-container p {
color: #8a99af;
font-size: 11px;
font-weight: 800;
letter-spacing: 2px;
margin: 8px 0 0 0;
text-transform: uppercase;
}
/* System Status Box */
.status-container {
border: 1.5px solid #e1e8f0;
border-radius: 12px;
padding: 15px 22px;
margin-bottom: 20px;
background: white;
}
.status-title {
color: #1299ff !important; /* Biru sesuai permintaan */
font-weight: 800;
font-size: 16px;
margin-bottom: 8px;
}
.status-item {
display: flex;
align-items: center;
gap: 10px;
font-size: 14px;
font-weight: 700;
margin-bottom: 4px;
}
.text-dark-gray {
color: #4a5568 !important; /* Abu-abu Hitam */
}
.text-green-bold {
color: #28a745 !important; /* Hijau Tebal */
font-weight: 900 !important;
text-shadow: none !important; /* Tanpa bayangan */
}
.text-blue-status {
color: #1299ff !important; /* Biru */
}
.status-dot {
width: 8px;
height: 8px;
background-color: #1299ff;
border-radius: 50%;
}
.pulse-dot {
width: 8px;
height: 8px;
background-color: #1299ff;
border-radius: 50%;
position: relative;
}
.pulse-dot::after {
content: "";
position: absolute;
width: 100%;
height: 100%;
background-color: #1299ff;
border-radius: 50%;
animation: pulse 2.5s infinite;
}
@keyframes pulse {
0% { transform: scale(1); opacity: 0.8; }
100% { transform: scale(3); opacity: 0; }
}
.slim-card { max-width: 480px; margin: 0 auto; background: transparent; padding: 10px; }
.tabs > .tab-nav {
display: flex !important;
overflow-x: auto !important;
white-space: nowrap !important;
flex-wrap: nowrap !important;
}
.tabs > .tab-nav button { flex: 0 0 auto !important; }
.scroll-box { height: 200px; overflow-y: auto; border: 1px solid #f0f4f8; border-radius: 12px; padding: 10px; background: #fafbfc; margin-bottom: 10px; }
.char-btn { background: white !important; border: 1px solid #e2e8f0 !important; border-left: 5px solid #1299ff !important; text-align: left !important; padding: 8px !important; font-size: 12px !important; margin-bottom: 4px !important; width: 100%; color: #4a5568 !important; }
.char-btn:hover { background: #f0f7ff !important; border-color: #1299ff !important; }
.warning-card { background: #fff9f0; border: 2px dashed #f5a623; border-radius: 10px; padding: 12px; margin-bottom: 15px; text-align: center; }
.warning-title { color: #f5a623; font-weight: 800; font-size: 13px; margin-bottom: 4px; display: flex; align-items: center; justify-content: center; gap: 5px; }
.warning-text { color: #855d1a; font-size: 11px; line-height: 1.5; font-weight: 600; }
.jp-btn { background: #f8fafc !important; border: 1px solid #cbd5e1 !important; color: #475569 !important; font-weight: 700 !important; border-radius: 10px !important; margin-bottom: 10px; font-size: 12px !important; width: 100%; }
.gen-btn { background: #1299ff !important; color: white !important; font-weight: 700 !important; border-radius: 12px !important; height: 45px !important; width: 100%; border: none !important; cursor: pointer; transition: 0.3s; }
.info-header-custom { background: #1299ff !important; color: white !important; border: none !important; border-radius: 8px 8px 0 0 !important; padding: 12px 15px !important; width: 100% !important; cursor: pointer; display: flex !important; justify-content: center !important; align-items: center !important; font-weight: 800 !important; font-size: 14px !important; margin-top: 15px; gap: 15px; }
.credit-footer { margin-top: 25px; padding: 15px; background: white; border-radius: 12px; text-align: center; border: 1px solid #eef2f6; border-bottom: 4px solid #1299ff; color: #94a3b8; font-weight: 700; font-size: 12px; letter-spacing: 2px; }
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_classes="slim-card"):
# Header Blue Archive
gr.HTML("""
<div class="ba-header-container">
<h1>Library Anime</h1>
<p>πŸ‚ Style-Bert-VITS2 πŸ‚</p>
</div>
<div class="status-container">
<div class="status-title">System Status</div>
<div class="status-item">
<div class="status-dot"></div>
<span class="text-dark-gray">Model :</span>
<span class="text-green-bold">&nbsp;LOADED βœ…</span>
</div>
<div class="status-item">
<div class="pulse-dot"></div>
<span class="text-dark-gray">System :</span>
<span class="text-blue-status">&nbsp;Online</span>
</div>
</div>
""")
with gr.Tabs(elem_classes="tabs"):
for m_id in sorted(CLEAN_INFO.keys(), key=int):
with gr.Tab(f"Model {m_id}"):
gr.Markdown(f"### πŸ“‚ {CLEAN_INFO[m_id]['title']}")
m_data = get_vits_model(m_id)
chars = m_data[2] if m_data else []
m_p = os.path.join(LOCAL_ROOT, str(m_id))
cov = next((os.path.join(m_p, f"cover.{e}") for e in ['png','jpg','jpeg','webp'] if os.path.exists(os.path.join(m_p, f"cover.{e}"))), None)
if cov: gr.Image(cov, show_label=False, interactive=False, height=140)
sel_name = gr.State("")
char_display = gr.Markdown("πŸ“ *Silakan pilih karakter...*")
gr.HTML("<p style='font-weight:800; font-size:11px; color:#8a99af; margin-bottom:8px;'>CHARACTER LIST (ROMAJI)</p>")
with gr.Column(elem_classes="scroll-box"):
if chars:
for name in chars:
btn = gr.Button(f"πŸ‘€ {name}", elem_classes="char-btn")
btn.click(fn=lambda n=name: (n, f"πŸ“ Selected: **{n}**"), outputs=[sel_name, char_display])
gr.HTML("""
<div class="warning-card">
<div class="warning-title">πŸ”– PERINGATAN MINNA πŸ”–</div>
<div class="warning-text">
Model 19 is not working. Cara pakai VITS ini klik aja character lalu masukkan input text dan Generate Voice! Done ✨
</div>
</div>
""")
ex_text = re.sub(r'\[[A-Z]{2}\]', '', CLEAN_INFO[m_id].get("example", "こんにけは。"))
txt_in = gr.TextArea(label="Input Text", value=ex_text, lines=3)
gr.Button("🎲 INPUTS RANDOM TEXT 🎲", elem_classes="jp-btn").click(get_random_jp, outputs=[txt_in])
spd = gr.Slider(0.5, 2.0, 1.0, step=0.1, label="Speed Audio")
btn_gen = gr.Button("🎐 GENERATE VOICE 🎐", elem_classes="gen-btn")
aud_out = gr.Audio(label="Voice Output")
gr.HTML(f"""
<button onclick="
const content = document.getElementById('info-content-{m_id}');
content.style.display = (content.style.display === 'none') ? 'grid' : 'none';
" class="info-header-custom">
πŸ“‘ Character Information πŸ“‘ &nbsp; β–Ό
</button>
""")
gr.HTML(get_char_info_html(m_id))
status_log = gr.Textbox(visible=False)
btn_gen.click(fn=tts_execute, inputs=[gr.State(m_id), txt_in, sel_name, spd], outputs=[status_log, aud_out])
gr.HTML("""<div class="credit-footer">πŸŒ₯️ CREATED BY MUTSUMI πŸŒ₯️</div>""")
if __name__ == "__main__":
demo.launch()