| import gradio as gr |
| import os |
| import torch |
| import commons |
| import utils |
| from models import SynthesizerTrn |
| import numpy as np |
| import json |
| import shutil |
| from huggingface_hub import snapshot_download |
|
|
| |
| try: |
| import pykakasi |
| kks = pykakasi.kakasi() |
| def to_romaji(text): |
| if not text: return text |
| text_str = str(text) |
| result = kks.convert(text_str) |
| romaji = "".join([item['hepburn'].capitalize() for item in result]) |
| return romaji if romaji and romaji.lower() != text_str.lower() else text_str |
| except ImportError: |
| def to_romaji(text): |
| return str(text) |
|
|
| |
| REPO_ID = "Plana-Archive/Plana-TTS" |
| LOCAL_ROOT = "saved_model" |
|
|
| def download_assets(): |
| if not os.path.exists(os.path.join(LOCAL_ROOT, "info.json")): |
| try: |
| snapshot_download(repo_id=REPO_ID, local_dir=".", allow_patterns=["info.json"]) |
| if os.path.exists("info.json"): |
| os.makedirs(LOCAL_ROOT, exist_ok=True) |
| shutil.move("info.json", os.path.join(LOCAL_ROOT, "info.json")) |
| snapshot_download(repo_id=REPO_ID, local_dir=LOCAL_ROOT, allow_patterns=["MOE-TTS/saved_model/*"]) |
| wrong_path = os.path.join(LOCAL_ROOT, "MOE-TTS", "saved_model") |
| if os.path.exists(wrong_path): |
| for item in os.listdir(wrong_path): |
| shutil.move(os.path.join(wrong_path, item), os.path.join(LOCAL_ROOT, item)) |
| shutil.rmtree(os.path.join(LOCAL_ROOT, "MOE-TTS")) |
| except Exception as e: |
| print(f"Download error: {str(e)}") |
|
|
| download_assets() |
|
|
| |
| loaded_models = {} |
|
|
| def clean_config(conf): |
| """ |
| Mengonversi semua key dalam dictionary menjadi string secara rekursif. |
| Ini wajib untuk model dengan banyak speaker seperti Model 19 agar tidak error 'int'. |
| """ |
| if isinstance(conf, dict): |
| return {str(k): clean_config(v) for k, v in conf.items()} |
| elif isinstance(conf, list): |
| return [clean_config(i) for i in conf] |
| return conf |
|
|
| def get_vits_model(m_id): |
| mid = str(m_id) |
| if mid in loaded_models: |
| return loaded_models[mid] |
| try: |
| p = os.path.join(LOCAL_ROOT, mid) |
| config_path = os.path.join(p, "config.json") |
| |
| if not os.path.exists(config_path): |
| return None |
| |
| hps = utils.get_hparams_from_file(config_path) |
| |
| |
| if hasattr(hps, 'model'): |
| |
| model_dict = hps.model.__dict__ if hasattr(hps.model, '__dict__') else dict(hps.model) |
| model_params = clean_config(model_dict) |
| else: |
| model_params = {} |
| |
| net = SynthesizerTrn( |
| len(hps.symbols), |
| hps.data.filter_length // 2 + 1, |
| hps.train.segment_size // hps.data.hop_length, |
| n_speakers=hps.data.n_speakers, |
| **model_params |
| ) |
| |
| utils.load_checkpoint(os.path.join(p, "model.pth"), net, None) |
| net.eval() |
| |
| raw_spks = hps.speakers if hasattr(hps, 'speakers') else [f"Character {i}" for i in range(hps.data.n_speakers)] |
| display_spks = [to_romaji(s) for s in raw_spks] |
| |
| loaded_models[mid] = (hps, net, display_spks, raw_spks) |
| return loaded_models[mid] |
| except Exception as e: |
| print(f"Error loading model {m_id}: {str(e)}") |
| return None |
|
|
| def tts_execute(m_id, text, speaker_display, speed): |
| data = get_vits_model(m_id) |
| if not data: return None |
| hps, net, display_spks, raw_spks = data |
| try: |
| sid = display_spks.index(speaker_display) |
| from text import text_to_sequence |
| cleaners = hps.data.text_cleaners if hasattr(hps.data, 'text_cleaners') else ['japanese_cleaners'] |
| seq = text_to_sequence(text, hps.symbols, cleaners) |
| if hps.data.add_blank: seq = commons.intersperse(seq, 0) |
| x = torch.LongTensor(seq).unsqueeze(0) |
| x_len = torch.LongTensor([len(seq)]) |
| with torch.no_grad(): |
| audio = net.infer(x, x_len, sid=torch.LongTensor([sid]), noise_scale=0.667, |
| noise_scale_w=0.8, length_scale=1.0/speed)[0][0,0].data.cpu().float().numpy() |
| return (hps.data.sampling_rate, (audio / np.abs(audio).max() * 32767).astype(np.int16)) |
| except Exception as e: |
| print(f"TTS error: {str(e)}") |
| return None |
|
|
| |
| css = """ |
| .gradio-container { max-width: 850px !important; margin: 0 auto !important; padding: 10px !important; } |
| .header-box { |
| background: white; border-radius: 15px; padding: 25px !important; |
| margin-bottom: 20px !important; border-top: 6px solid #5f6caf; |
| text-align: center; box-shadow: 0 2px 10px rgba(0,0,0,0.05); |
| } |
| .header-box h1 { font-size: 24px !important; margin: 0; } |
| .tabs-wrapper { |
| border: 1px dashed #cbd5e0 !important; border-radius: 15px !important; |
| padding: 15px !important; background: white; margin-bottom: 20px; |
| } |
| .content-area { |
| background: white; border-radius: 15px !important; padding: 20px !important; |
| border: 1px solid #eee !important; width: 100% !important; |
| } |
| .model-title { font-size: 18px !important; font-weight: bold; margin-bottom: 10px; color: #333; } |
| .footer-box { |
| background: white; border-radius: 12px; padding: 20px !important; |
| margin-top: 20px !important; text-align: center !important; border: 1px solid #eee; |
| } |
| footer { display: none !important; } |
| """ |
|
|
| with gr.Blocks(css=css, title="MOE-TTS", theme=gr.themes.Soft()) as demo: |
| with gr.Column(elem_classes="header-box"): |
| gr.Markdown("# Library Anime TTS\n### LIBRARY ANIME PREMIUM") |
| |
| info_path = os.path.join(LOCAL_ROOT, "info.json") |
| if os.path.exists(info_path): |
| with open(info_path, "r", encoding="utf-8") as f: |
| all_info = json.load(f) |
| |
| with gr.Column(elem_classes="tabs-wrapper"): |
| sorted_keys = sorted(all_info.keys(), key=int) |
| with gr.Tabs(): |
| for m_id in sorted_keys: |
| info = all_info[m_id] |
| m_path = os.path.join(LOCAL_ROOT, str(m_id)) |
| if not os.path.exists(m_path): continue |
| |
| with gr.Tab(f"Model {m_id}"): |
| m_res = get_vits_model(m_id) |
| is_ok = m_res is not None |
| spks = m_res[2] if is_ok else ["Error Loading Model"] |
| |
| COVER_FILE = None |
| for ext in ['jpg', 'png', 'jpeg', 'webp']: |
| tmp = os.path.join(m_path, f"cover.{ext}") |
| if os.path.exists(tmp): COVER_FILE = tmp; break |
|
|
| with gr.Column(elem_classes="content-area"): |
| with gr.Row(): |
| with gr.Column(scale=1): |
| if COVER_FILE: |
| gr.Image(COVER_FILE, show_label=False) |
| t_romaji = to_romaji(info.get('title', 'Model')) |
| gr.Markdown(f"<div class='model-title'>{t_romaji}</div>") |
| |
| with gr.Column(scale=2): |
| in_txt = gr.TextArea(label="Text Input", value=info.get("example", ""), lines=5) |
| with gr.Row(): |
| in_char = gr.Dropdown(choices=spks, value=spks[0] if spks else None, label="Character") |
| in_speed = gr.Slider(0.5, 2.0, 1.0, step=0.1, label="Speed") |
| |
| gen_btn = gr.Button("Generate Voice", variant="primary") |
| aud_out = gr.Audio(label="Result") |
| |
| gen_btn.click(fn=tts_execute, inputs=[gr.State(str(m_id)), in_txt, in_char, in_speed], outputs=aud_out, api_name=False) |
| |
| with gr.Column(elem_classes="footer-box"): |
| gr.Markdown("**CREATED BY PLANA-CHAN**\nLibrary Anime TTS") |
| else: |
| gr.Markdown("## info.json not found") |
|
|
| demo.launch(show_api=False) |