Spaces:
Running
Running
| import json | |
| import os | |
| import re | |
| import logging | |
| import random | |
| import shutil | |
| import numpy as np | |
| import torch | |
| from torch import no_grad, LongTensor | |
| import commons | |
| import utils | |
| import gradio as gr | |
| from models import SynthesizerTrn | |
| from text import text_to_sequence, _clean_text | |
| from huggingface_hub import hf_hub_download | |
| # --- 1. SETUP LOGGING --- | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| logger = logging.getLogger(__name__) | |
| # --- 2. CONFIGURATION & ASSETS --- | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| REPO_ID = "Plana-Archive/Plana-TTS" | |
| SUBFOLDER = "Prosekai-TTS/saved_model" | |
| device = torch.device("cpu") | |
| # --- 3. ROMAJI CONVERTER --- | |
| try: | |
| import pykakasi | |
| kks = pykakasi.kakasi() | |
| def to_romaji(text): | |
| if not text or text == "None": return "" | |
| try: | |
| result = kks.convert(str(text)) | |
| return "".join([item['hepburn'].capitalize() for item in result]) | |
| except: | |
| return str(text) | |
| except: | |
| def to_romaji(text): return str(text) | |
| # --- 4. LOADING MODEL --- | |
| logger.info("[*] Downloading Project Sekai model assets...") | |
| config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json", subfolder=SUBFOLDER, token=HF_TOKEN) | |
| model_path = hf_hub_download(repo_id=REPO_ID, filename="model.pth", subfolder=SUBFOLDER, token=HF_TOKEN) | |
| cover_path = hf_hub_download(repo_id=REPO_ID, filename="cover.png", subfolder=SUBFOLDER, token=HF_TOKEN) | |
| hps = utils.get_hparams_from_file(config_path) | |
| model = SynthesizerTrn( | |
| len(hps.symbols), | |
| hps.data.filter_length // 2 + 1, | |
| hps.train.segment_size // hps.data.hop_length, | |
| n_speakers=hps.data.n_speakers, | |
| **hps.model).to(device) | |
| utils.load_checkpoint(model_path, model, None) | |
| model.eval() | |
| speaker_names = [name for name in hps.speakers if name != "None"] | |
| display_names = [to_romaji(name) for name in speaker_names] | |
| speaker_map = {romaji: original for romaji, original in zip(display_names, speaker_names)} | |
| # --- 5. LOGIC FUNCTIONS --- | |
| def get_text(text, hps, is_phoneme): | |
| text_norm = text_to_sequence(text, hps.symbols, [] if is_phoneme else hps.data.text_cleaners) | |
| if hps.data.add_blank: | |
| text_norm = commons.intersperse(text_norm, 0) | |
| return LongTensor(text_norm) | |
| def tts_execute(text, speaker_romaji, speed, is_phoneme): | |
| if not speaker_romaji: | |
| return "β Pilih Karakter dulu!", None | |
| try: | |
| original_name = speaker_map[speaker_romaji] | |
| speaker_id = hps.speakers.index(original_name) | |
| stn_tst = get_text(text, hps, is_phoneme) | |
| with no_grad(): | |
| x_tst = stn_tst.unsqueeze(0).to(device) | |
| x_tst_lengths = LongTensor([stn_tst.size(0)]).to(device) | |
| sid = LongTensor([speaker_id]).to(device) | |
| audio = model.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, | |
| noise_scale_w=0.8, length_scale=1.0 / speed)[0][0, 0].data.cpu().float().numpy() | |
| return "β Success!", (hps.data.sampling_rate, (audio * 32767).astype(np.int16)) | |
| except Exception as e: | |
| return f"Error: {e}", None | |
| def get_random_jp(): | |
| return random.choice(["γγγ«γ‘γ―οΌ", "γε ζ°γ§γγοΌ", "γ―γ³γγ·γ§γζι«οΌ", "η·΄ηΏγι εΌ΅γγγγ", "γΎγζζ₯γδΌγγγγͺοΌ"]) | |
| def to_phoneme_fn(text): | |
| return _clean_text(text, hps.data.text_cleaners) if text != "" else "" | |
| # --- 6. UI STYLE --- | |
| css = """ | |
| :root { | |
| --primary-600: #1299ff !important; | |
| --accent-600: #1299ff !important; | |
| --loader-color: #add8e6 !important; /* Warna Biru Muda untuk Loading */ | |
| } | |
| /* Modifikasi Loading Screen Gradio */ | |
| .gradio-container .load-overlay { | |
| background: rgba(255, 255, 255, 0.8) !important; | |
| } | |
| .gradio-container .loader { | |
| border-top-color: #add8e6 !important; | |
| } | |
| .ba-header-container { border: 1.5px solid #e1e8f0; border-radius: 12px; padding: 20px 10px; margin-bottom: 12px; background: white; text-align: center; } | |
| .ba-header-container h1 { color: #1299ff !important; font-weight: 700 !important; font-size: 36px !important; margin: 0; } | |
| .status-container { border: 1.5px solid #e1e8f0; border-radius: 12px; padding: 15px 22px; margin-bottom: 20px; background: white; } | |
| .status-title { color: #1299ff !important; font-weight: 800; font-size: 16px; margin-bottom: 8px; } | |
| .text-green-bold { color: #28a745 !important; font-weight: 900 !important; } | |
| .text-blue-status { color: #1299ff !important; } | |
| .slim-card { max-width: 500px; margin: 0 auto; background: transparent; padding: 10px; } | |
| .scroll-box { height: 220px; overflow-y: auto; border: 1px solid #f0f4f8; border-radius: 12px; padding: 10px; background: #fafbfc; margin-bottom: 10px; } | |
| .char-btn { background: white !important; border: 1px solid #e2e8f0 !important; border-left: 5px solid #1299ff !important; text-align: left !important; padding: 8px !important; font-size: 12px !important; margin-bottom: 4px !important; width: 100%; color: #4a5568 !important; } | |
| .warning-card { background: #fff9f0; border: 2px dashed #f5a623; border-radius: 10px; padding: 12px; margin-bottom: 15px; text-align: center; } | |
| .jp-btn { background: #f8fafc !important; border: 1px solid #cbd5e1 !important; color: #475569 !important; font-weight: 700 !important; border-radius: 10px !important; margin-bottom: 10px; font-size: 12px !important; width: 100%; } | |
| .gen-btn { background: #1299ff !important; color: white !important; font-weight: 700 !important; border-radius: 12px !important; height: 45px !important; width: 100%; border: none !important; cursor: pointer; } | |
| .credit-footer { margin-top: 25px; padding: 15px; background: white; border-radius: 12px; text-align: center; border-bottom: 4px solid #1299ff; color: #94a3b8; font-weight: 700; font-size: 12px; letter-spacing: 2px; } | |
| """ | |
| # --- 7. GRADIO INTERFACE --- | |
| with gr.Blocks(title="Project Sekai TTS", css=css) as app: | |
| with gr.Column(elem_classes="slim-card"): | |
| gr.HTML(f""" | |
| <div class="ba-header-container"> | |
| <h1>Project Sekai</h1> | |
| <p>π« VITS Emotional TTS π«</p> | |
| </div> | |
| <div class="status-container"> | |
| <div class="status-title">System Status</div> | |
| <div class="status-item"><span style="color:#4a5568">Model :</span> <span class="text-green-bold"> ProSekai Pack β </span></div> | |
| <div class="status-item"><span style="color:#4a5568">Device :</span> <span class="text-blue-status"> CPU Mode</span></div> | |
| </div> | |
| """) | |
| if os.path.exists(cover_path): | |
| gr.Image(cover_path, show_label=False, interactive=False, height=160) | |
| sel_name = gr.State("") | |
| char_display = gr.Markdown("π *Silakan pilih karakter...*") | |
| with gr.Column(elem_classes="scroll-box"): | |
| for name in display_names: | |
| btn = gr.Button(f"π€ {name}", elem_classes="char-btn") | |
| btn.click(fn=lambda n=name: (n, f"π Selected: **{n}**"), outputs=[sel_name, char_display]) | |
| with gr.Column(): | |
| gr.HTML(""" | |
| <div class="warning-card"> | |
| <div style="color:#f5a623;font-weight:800;font-size:13px;">π INSTRUKSI π</div> | |
| <div style="color:#855d1a;font-size:11px;font-weight:600;"> | |
| Pilih member Proseka, tulis teks Jepang, lalu klik Generate! | |
| </div> | |
| </div> | |
| """) | |
| txt_in = gr.TextArea(label="Input Text", value="γγγ«γ‘γ―γ", lines=3) | |
| gr.Button("π² RANDOM JAPANESE TEXT π²", elem_classes="jp-btn").click(get_random_jp, outputs=[txt_in]) | |
| speed = gr.Slider(0.5, 2.0, 1.0, step=0.1, label="Speed Audio") | |
| with gr.Accordion(label="Advanced Options", open=False): | |
| phoneme_input = gr.Checkbox(value=False, label="Phoneme input") | |
| to_phoneme_btn = gr.Button("Convert text to phoneme") | |
| phoneme_list = gr.Dataset( | |
| label="Phoneme list", | |
| components=[txt_in], | |
| samples=[[x] for x in hps.symbols] | |
| ) | |
| to_phoneme_btn.click(to_phoneme_fn, [txt_in], [txt_in]) | |
| btn_gen = gr.Button("π GENERATE VOICE π", elem_classes="gen-btn") | |
| status_out = gr.Textbox(label="Status Message", interactive=False) | |
| aud_out = gr.Audio(label="Voice Output") | |
| btn_gen.click( | |
| fn=tts_execute, | |
| inputs=[txt_in, sel_name, speed, phoneme_input], | |
| outputs=[status_out, aud_out] | |
| ) | |
| gr.HTML("""<div class="credit-footer">π₯ CREATED BY MUTSUMI π₯</div>""") | |
| if __name__ == "__main__": | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| ssr_mode=False | |
| ) |