Spaces:
Sleeping
Sleeping
File size: 8,880 Bytes
957c16c 98c252c 574ec19 957c16c c999898 957c16c 574ec19 98c252c c999898 957c16c 574ec19 957c16c 574ec19 98c252c 574ec19 8e28b4a 574ec19 957c16c 5ad521e 574ec19 c4fce1b 574ec19 6f2f222 574ec19 f596609 5623961 c4fce1b f596609 5623961 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 | import json
import os
import re
import logging
import random
import shutil
import numpy as np
import torch
from torch import no_grad, LongTensor
import commons
import utils
import gradio as gr
from models import SynthesizerTrn
from text import text_to_sequence, _clean_text
from huggingface_hub import hf_hub_download
# --- 1. SETUP LOGGING ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# --- 2. CONFIGURATION & ASSETS ---
HF_TOKEN = os.getenv("HF_TOKEN")
REPO_ID = "Plana-Archive/Plana-TTS"
SUBFOLDER = "Prosekai-TTS/saved_model"
device = torch.device("cpu")
# --- 3. ROMAJI CONVERTER ---
try:
import pykakasi
kks = pykakasi.kakasi()
def to_romaji(text):
if not text or text == "None": return ""
try:
result = kks.convert(str(text))
return "".join([item['hepburn'].capitalize() for item in result])
except:
return str(text)
except:
def to_romaji(text): return str(text)
# --- 4. LOADING MODEL ---
logger.info("[*] Downloading Project Sekai model assets...")
config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json", subfolder=SUBFOLDER, token=HF_TOKEN)
model_path = hf_hub_download(repo_id=REPO_ID, filename="model.pth", subfolder=SUBFOLDER, token=HF_TOKEN)
cover_path = hf_hub_download(repo_id=REPO_ID, filename="cover.png", subfolder=SUBFOLDER, token=HF_TOKEN)
hps = utils.get_hparams_from_file(config_path)
model = SynthesizerTrn(
len(hps.symbols),
hps.data.filter_length // 2 + 1,
hps.train.segment_size // hps.data.hop_length,
n_speakers=hps.data.n_speakers,
**hps.model).to(device)
utils.load_checkpoint(model_path, model, None)
model.eval()
speaker_names = [name for name in hps.speakers if name != "None"]
display_names = [to_romaji(name) for name in speaker_names]
speaker_map = {romaji: original for romaji, original in zip(display_names, speaker_names)}
# --- 5. LOGIC FUNCTIONS ---
def get_text(text, hps, is_phoneme):
text_norm = text_to_sequence(text, hps.symbols, [] if is_phoneme else hps.data.text_cleaners)
if hps.data.add_blank:
text_norm = commons.intersperse(text_norm, 0)
return LongTensor(text_norm)
def tts_execute(text, speaker_romaji, speed, is_phoneme):
if not speaker_romaji:
return "β Pilih Karakter dulu!", None
try:
original_name = speaker_map[speaker_romaji]
speaker_id = hps.speakers.index(original_name)
stn_tst = get_text(text, hps, is_phoneme)
with no_grad():
x_tst = stn_tst.unsqueeze(0).to(device)
x_tst_lengths = LongTensor([stn_tst.size(0)]).to(device)
sid = LongTensor([speaker_id]).to(device)
audio = model.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667,
noise_scale_w=0.8, length_scale=1.0 / speed)[0][0, 0].data.cpu().float().numpy()
return "β
Success!", (hps.data.sampling_rate, (audio * 32767).astype(np.int16))
except Exception as e:
return f"Error: {e}", None
def get_random_jp():
return random.choice(["γγγ«γ‘γ―οΌ", "γε
ζ°γ§γγοΌ", "γ―γ³γγ·γ§γζι«οΌ", "η·΄ηΏγι εΌ΅γγγγ", "γΎγζζ₯γδΌγγγγͺοΌ"])
def to_phoneme_fn(text):
return _clean_text(text, hps.data.text_cleaners) if text != "" else ""
# --- 6. UI STYLE ---
css = """
:root {
--primary-600: #1299ff !important;
--accent-600: #1299ff !important;
--loader-color: #add8e6 !important; /* Warna Biru Muda untuk Loading */
}
/* Modifikasi Loading Screen Gradio */
.gradio-container .load-overlay {
background: rgba(255, 255, 255, 0.8) !important;
}
.gradio-container .loader {
border-top-color: #add8e6 !important;
}
.ba-header-container { border: 1.5px solid #e1e8f0; border-radius: 12px; padding: 20px 10px; margin-bottom: 12px; background: white; text-align: center; }
.ba-header-container h1 { color: #1299ff !important; font-weight: 700 !important; font-size: 36px !important; margin: 0; }
.status-container { border: 1.5px solid #e1e8f0; border-radius: 12px; padding: 15px 22px; margin-bottom: 20px; background: white; }
.status-title { color: #1299ff !important; font-weight: 800; font-size: 16px; margin-bottom: 8px; }
.text-green-bold { color: #28a745 !important; font-weight: 900 !important; }
.text-blue-status { color: #1299ff !important; }
.slim-card { max-width: 500px; margin: 0 auto; background: transparent; padding: 10px; }
.scroll-box { height: 220px; overflow-y: auto; border: 1px solid #f0f4f8; border-radius: 12px; padding: 10px; background: #fafbfc; margin-bottom: 10px; }
.char-btn { background: white !important; border: 1px solid #e2e8f0 !important; border-left: 5px solid #1299ff !important; text-align: left !important; padding: 8px !important; font-size: 12px !important; margin-bottom: 4px !important; width: 100%; color: #4a5568 !important; }
.warning-card { background: #fff9f0; border: 2px dashed #f5a623; border-radius: 10px; padding: 12px; margin-bottom: 15px; text-align: center; }
.jp-btn { background: #f8fafc !important; border: 1px solid #cbd5e1 !important; color: #475569 !important; font-weight: 700 !important; border-radius: 10px !important; margin-bottom: 10px; font-size: 12px !important; width: 100%; }
.gen-btn { background: #1299ff !important; color: white !important; font-weight: 700 !important; border-radius: 12px !important; height: 45px !important; width: 100%; border: none !important; cursor: pointer; }
.credit-footer { margin-top: 25px; padding: 15px; background: white; border-radius: 12px; text-align: center; border-bottom: 4px solid #1299ff; color: #94a3b8; font-weight: 700; font-size: 12px; letter-spacing: 2px; }
"""
# --- 7. GRADIO INTERFACE ---
with gr.Blocks(title="Project Sekai TTS", css=css) as app:
with gr.Column(elem_classes="slim-card"):
gr.HTML(f"""
<div class="ba-header-container">
<h1>Project Sekai</h1>
<p>π« VITS Emotional TTS π«</p>
</div>
<div class="status-container">
<div class="status-title">System Status</div>
<div class="status-item"><span style="color:#4a5568">Model :</span> <span class="text-green-bold"> ProSekai Pack β
</span></div>
<div class="status-item"><span style="color:#4a5568">Device :</span> <span class="text-blue-status"> CPU Mode</span></div>
</div>
""")
if os.path.exists(cover_path):
gr.Image(cover_path, show_label=False, interactive=False, height=160)
sel_name = gr.State("")
char_display = gr.Markdown("π *Silakan pilih karakter...*")
with gr.Column(elem_classes="scroll-box"):
for name in display_names:
btn = gr.Button(f"π€ {name}", elem_classes="char-btn")
btn.click(fn=lambda n=name: (n, f"π Selected: **{n}**"), outputs=[sel_name, char_display])
with gr.Column():
gr.HTML("""
<div class="warning-card">
<div style="color:#f5a623;font-weight:800;font-size:13px;">π INSTRUKSI π</div>
<div style="color:#855d1a;font-size:11px;font-weight:600;">
Pilih member Proseka, tulis teks Jepang, lalu klik Generate!
</div>
</div>
""")
txt_in = gr.TextArea(label="Input Text", value="γγγ«γ‘γ―γ", lines=3)
gr.Button("π² RANDOM JAPANESE TEXT π²", elem_classes="jp-btn").click(get_random_jp, outputs=[txt_in])
speed = gr.Slider(0.5, 2.0, 1.0, step=0.1, label="Speed Audio")
with gr.Accordion(label="Advanced Options", open=False):
phoneme_input = gr.Checkbox(value=False, label="Phoneme input")
to_phoneme_btn = gr.Button("Convert text to phoneme")
phoneme_list = gr.Dataset(
label="Phoneme list",
components=[txt_in],
samples=[[x] for x in hps.symbols]
)
to_phoneme_btn.click(to_phoneme_fn, [txt_in], [txt_in])
btn_gen = gr.Button("π GENERATE VOICE π", elem_classes="gen-btn")
status_out = gr.Textbox(label="Status Message", interactive=False)
aud_out = gr.Audio(label="Voice Output")
btn_gen.click(
fn=tts_execute,
inputs=[txt_in, sel_name, speed, phoneme_input],
outputs=[status_out, aud_out]
)
gr.HTML("""<div class="credit-footer">π₯ CREATED BY MUTSUMI π₯</div>""")
if __name__ == "__main__":
app.launch(
server_name="0.0.0.0",
server_port=7860,
ssr_mode=False
) |