| import os |
| import glob |
| import json |
| import traceback |
| import logging |
| import gradio as gr |
| import numpy as np |
| import librosa |
| import torch |
| import asyncio |
| import edge_tts |
| import sys |
| import io |
| import wave |
| import shutil |
| from datetime import datetime |
| from fairseq import checkpoint_utils |
| from fairseq.data.dictionary import Dictionary |
| from huggingface_hub import snapshot_download |
| from lib.infer_pack.models import ( |
| SynthesizerTrnMs256NSFsid, |
| SynthesizerTrnMs256NSFsid_nono, |
| SynthesizerTrnMs768NSFsid, |
| SynthesizerTrnMs768NSFsid_nono, |
| ) |
| from vc_infer_pipeline import VC |
| from config import Config |
|
|
| config = Config() |
| logging.getLogger("numba").setLevel(logging.WARNING) |
|
|
| |
| |
| if not os.path.exists("weights"): |
| print("Mendownload weights dan bahan model dari repo Plana-RCV/BanGDream-MyGO...") |
| snapshot_download( |
| repo_id="Plana-Archive/Anime-RCV", |
| local_dir=".", |
| allow_patterns=[ |
| "BanGDream-MyGO/weights/*", |
| "BanGDream-MyGO/hubert_base.pt", |
| "BanGDream-MyGO/rmvpe.pt" |
| ], |
| repo_type="model" |
| ) |
| |
| source_dir = "BanGDream-MyGO" |
| if os.path.exists(source_dir): |
| print(f"Menyusun ulang struktur folder dari {source_dir}...") |
| for item in os.listdir(source_dir): |
| s = os.path.join(source_dir, item) |
| d = os.path.join(".", item) |
| if os.path.isdir(s): |
| if os.path.exists(d): |
| shutil.rmtree(d) |
| shutil.move(s, d) |
| else: |
| shutil.move(s, d) |
| os.rmdir(source_dir) |
| print("Struktur folder berhasil diperbarui.") |
|
|
| spaces = True |
|
|
| |
| f0method_mode = ["pm", "harvest"] |
| if os.path.isfile("rmvpe.pt"): |
| f0method_mode.insert(2, "rmvpe") |
|
|
| def _load_audio_input(tts_text, speed, spaces_limit=20): |
| temp_file = "tts.mp3" |
| if not tts_text or tts_text.strip() == "": |
| return None, None, "EMPTY" |
| if len(tts_text) > 100 and spaces: |
| return None, None, "TOO_LONG" |
| |
| speed_rate = f"{'+' if speed >= 1.0 else '-'}{int(abs(speed - 1.0) * 100)}%" |
| tts_voice_default = "ja-JP-NanamiNeural" |
| |
| asyncio.run(edge_tts.Communicate(tts_text, tts_voice_default, rate=speed_rate).save(temp_file)) |
| audio, sr = librosa.load(temp_file, sr=16000, mono=True) |
| return audio, sr, temp_file |
|
|
| def create_vc_fn(model_name, tgt_sr, net_g, vc, if_f0, version, file_index): |
| def vc_fn( |
| tts_text, |
| f0_up_key, f0_method, index_rate, filter_radius, |
| resample_sr, rms_mix_rate, protect, speed_rate, |
| ): |
| logs = [] |
| temp_audio_file = "tts.mp3" |
| try: |
| audio, sr, status = _load_audio_input(tts_text, speed_rate) |
| if status == "EMPTY": |
| return "β οΈ Mohon masukkan teks terlebih dahulu!", None |
| if status == "TOO_LONG": |
| return "β Teks terlalu panjang! Maksimal 100 karakter.", None |
|
|
| logs.append(f"β¨ Model: {model_name}") |
| yield "\n".join(logs), None |
| logs.append("π₯ Memuat audio dasar...") |
| logs.append(f"βοΈ Memproses RVC (Pitch: {f0_up_key})...") |
| yield "\n".join(logs), None |
| |
| times = [0, 0, 0] |
| audio_opt = vc.pipeline( |
| hubert_model, net_g, 0, audio, status, |
| times, f0_up_key, f0_method, file_index, index_rate, |
| if_f0, filter_radius, tgt_sr, resample_sr, |
| rms_mix_rate, version, protect, f0_file=None, |
| ) |
| logs.append(f"β
Selesai pada: {datetime.now().strftime('%H:%M:%S')}") |
| yield "\n".join(logs), (tgt_sr, audio_opt) |
| except Exception as e: |
| traceback.print_exc() |
| return f"β Error: {str(e)}", None |
| finally: |
| if os.path.exists(temp_audio_file): |
| os.remove(temp_audio_file) |
| return vc_fn |
|
|
| def load_model(): |
| categories = [] |
| folder_info_path = "weights/folder_info.json" |
| |
| if os.path.isfile(folder_info_path): |
| with open(folder_info_path, "r", encoding="utf-8") as f: |
| folder_info = json.load(f) |
| |
| for category_name, category_info in folder_info.items(): |
| if not category_info.get('enable', True): continue |
| category_title = category_info['title'] |
| category_folder = category_info['folder_path'] |
| models = [] |
| |
| model_info_path = os.path.join("weights", category_folder, "model_info.json") |
| if not os.path.exists(model_info_path): continue |
| |
| with open(model_info_path, "r", encoding="utf-8") as f: |
| models_info = json.load(f) |
| |
| for character_name, info in models_info.items(): |
| if not info.get('enable', True): continue |
| |
| base_character_path = os.path.join("weights", category_folder, character_name) |
| cpt_path = os.path.join(base_character_path, info['model_path']) |
| model_cover = os.path.join(base_character_path, info['cover']) |
| model_index = os.path.join(base_character_path, info['feature_retrieval_library']) |
| |
| if not os.path.exists(cpt_path): continue |
| |
| cpt = torch.load(cpt_path, map_location="cpu") |
| tgt_sr, if_f0, version = cpt["config"][-1], cpt.get("f0", 1), cpt.get("version", "v1") |
| |
| if version == "v1": |
| net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half) if if_f0 == 1 else SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) |
| elif version == "v2": |
| net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half) if if_f0 == 1 else SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) |
| |
| if hasattr(net_g, "enc_q"): del net_g.enc_q |
| net_g.load_state_dict(cpt["weight"], strict=False) |
| net_g.eval().to(config.device) |
| net_g = net_g.half() if config.is_half else net_g.float() |
| vc = VC(tgt_sr, config) |
| |
| models.append((character_name, info['title'], info.get("author"), model_cover, version, create_vc_fn(info['model_path'], tgt_sr, net_g, vc, if_f0, version, model_index))) |
| |
| categories.append([category_title, category_folder, category_info.get('description',''), models]) |
| return categories |
|
|
| def load_hubert(): |
| global hubert_model |
| torch.serialization.add_safe_globals([Dictionary]) |
| models, _, _ = checkpoint_utils.load_model_ensemble_and_task(["hubert_base.pt"], suffix="",) |
| hubert_model = models[0].to(config.device) |
| hubert_model = hubert_model.half() if config.is_half else hubert_model.float() |
| hubert_model.eval() |
|
|
| if __name__ == '__main__': |
| load_hubert() |
| categories = load_model() |
| total_characters = sum(len(cat[3]) for cat in categories) |
|
|
| |
| custom_css = """ |
| .gradio-container { background-color: #ffffff !important; } |
| .tabs { background-color: #ffffff !important; border-radius: 12px; border: 1px solid #d1f2d1 !important; } |
| .primary-btn { background-color: #a8e6cf !important; border: none !important; color: white !important; font-weight: bold !important; } |
| .primary-btn:hover { background-color: #89d9bb !important; } |
| """ |
|
|
| with gr.Blocks(theme=gr.themes.Soft(primary_hue="green", secondary_hue="emerald"), css=custom_css) as app: |
| gr.HTML(f""" |
| <div style="font-family: 'Arial', sans-serif; max-width: 800px; margin: 20px auto 10px auto; border: 1px solid #d1f2d1; border-radius: 12px; padding: 15px; background-color: #ffffff; text-align: center;"> |
| <h1 style="color: #2d6a4f; margin: 0;">BanG Dream! RVC</h1> |
| <p style="color: #52b788; font-size: 14px; margin-top: 5px;">RVC-BANG-DREAM β’ Weights by Plana-Archive</p> |
| </div> |
| |
| <div style="font-family: 'Arial', sans-serif; max-width: 800px; margin: 0 auto 20px auto; border: 1px solid #d1f2d1; border-radius: 10px; padding: 15px; background-color: white; display: flex; justify-content: space-around; align-items: center;"> |
| <div style="text-align: center;"> |
| <p style="color: #94a3b8; font-size: 11px; font-weight: 700; margin: 0; text-transform: uppercase;">System Status</p> |
| <p style="color: #22c55e; font-size: 14px; font-weight: 700; margin: 0;">β ONLINE</p> |
| </div> |
| <div style="height: 30px; border-left: 1px solid #f1f5f9;"></div> |
| <div style="text-align: center;"> |
| <p style="color: #94a3b8; font-size: 11px; font-weight: 700; margin: 0; text-transform: uppercase;">Total Characters</p> |
| <p style="color: #1e293b; font-size: 14px; font-weight: 700; margin: 0;">{total_characters} Models</p> |
| </div> |
| </div> |
| """) |
|
|
| for (folder_title, folder, description, models) in categories: |
| with gr.TabItem(folder_title): |
| with gr.Tabs(): |
| for (name, title, author, cover, model_version, vc_fn) in models: |
| with gr.TabItem(name): |
| with gr.Row(): |
| gr.Markdown(f'<div align="center"><h3 style="color: #1b4332;">{title}</h3>' + (f'<img style="width:auto;height:250px;border-radius:10px;border: 3px solid #a8e6cf;" src="file/{cover}">' if cover else "") + '</div>') |
| with gr.Row(): |
| with gr.Column(): |
| tts_text = gr.Textbox(label="π·οΈ MASUK TEXT SINI", info="Masukkan teks yang ingin diucapkan", lines=3) |
| vc_pitch = gr.Slider(minimum=-12, maximum=12, label="Pitch (Nada)", value=12, step=1, info="Diset ke +12 untuk karakter perempuan") |
| |
| with gr.Column(): |
| f0method0 = gr.Radio(label="Algoritma Pitch", choices=f0method_mode, value="rmvpe" if "rmvpe" in f0method_mode else "pm") |
| index_rate1 = gr.Slider(minimum=0, maximum=1, label="Rasio Retrieval", value=0.75) |
| filter_radius0 = gr.Slider(minimum=0, maximum=7, label="Median Filtering", value=7, step=1) |
| |
| with gr.Column(): |
| resample_sr0 = gr.Slider(minimum=0, maximum=48000, label="Resample Rate", value=0, step=1) |
| rms_mix_rate0 = gr.Slider(minimum=0, maximum=1, label="Volume Envelope", value=0.76) |
| protect0 = gr.Slider(minimum=0, maximum=0.5, label="Proteksi Suara", value=0.33, step=0.01) |
| |
| gr.HTML("""<div style="font-family: 'Arial', sans-serif; border: 1px solid #bae6fd; border-radius: 10px; padding: 15px; background-color: #f0f9ff; margin-bottom: 10px; border-left: 5px solid #0ea5e9;"><h4 style="color: #0369a1; font-size: 14px; font-weight: 700; margin: 0 0 8px 0;">π Notes & Panduan Fitur π</h4><ul style="color: #075985; font-size: 12px; margin: 0; padding-left: 18px; line-height: 1.5;"><li><b>Algoritma Pitch:</b> Akurasi nada (RMVPE terbaik).</li><li><b>Rasio Retrieval:</b> Kemiripan karakter asli (0.7+).</li><li><b>Median Filtering:</b> Menghilangkan suara kresek/noise.</li><li><b>Resample Rate:</b> Kejernihan audio (0 otomatis).</li><li><b>Volume Envelope:</b> Keseimbangan volume suara.</li><li><b>Proteksi Suara:</b> Melindungi suara alami manusia.</li></ul></div>""") |
| |
| gr.HTML("""<div style="font-family: 'Arial', sans-serif; border: 1px solid #dcfce7; border-radius: 10px; padding: 15px; background-color: #f0fdf4; margin-bottom: 10px; border-left: 5px solid #22c55e;"><h4 style="color: #166534; font-size: 14px; font-weight: 700; margin: 0 0 8px 0;">π DI SARANKAN π</h4><ul style="color: #166534; font-size: 11px; margin: 0; padding-left: 18px; line-height: 1.6;"><li><b>Algoritma Pitch:</b> Selalu gunakan <b>RMVPE</b> untuk kejernihan maksimal.</li><li><b>Rasio Retrieval:</b> Set di angka <b>0.75</b> untuk kemiripan karakter.</li><li><b>Median Filtering:</b> Gunakan angka <b>7</b> untuk suara paling bersih.</li><li><b>Resample Rate:</b> Set ke <b>0</b> (Otomatis) agar tidak pecah.</li><li><b>Volume Envelope:</b> Gunakan <b>0.76</b> untuk kestabilan suara.</li><li><b>Proteksi Suara:</b> Set ke <b>0.33</b> agar hasil tidak kaku/robotik.</li><li><b>Pitch:</b> Naikkan ke <b>+12</b> khusus untuk karakter perempuan.</li></ul></div>""") |
|
|
| speed_rate = gr.Slider(minimum=0.5, maximum=2.0, label="Kecepatan Suara", value=1.0, step=0.1) |
| |
| gr.HTML("""<div style="margin-bottom: -15px;"><span style="color: #40916c; font-weight: 700; font-size: 13px;">π₯οΈ LOG SISTEM</span></div>""") |
| vc_log = gr.Textbox(label="", placeholder="Menunggu proses...", interactive=False) |
| vc_output = gr.Audio(label="Audio Hasil", interactive=False) |
| vc_convert = gr.Button("πΈ GENERATE VOICE πΈ", variant="primary", elem_classes="primary-btn") |
| |
| |
| gr.HTML(""" |
| <div style="font-family: 'Arial', sans-serif; border: 1px solid #fecaca; border-radius: 10px; padding: 15px; background-color: #fef2f2; margin-top: 15px; border-left: 5px solid #ef4444;"> |
| <h4 style="color: #991b1b; font-size: 14px; font-weight: 700; margin: 0 0 5px 0;">PERINGATAN MINNA π</h4> |
| <p style="color: #b91c1c; font-size: 12px; margin: 0; line-height: 1.5;"> |
| Setelah di Generate Voice, audionya akan muncul beberapa detik dan tunggu aja ya! |
| </p> |
| </div> |
| """) |
|
|
| vc_convert.click( |
| fn=vc_fn, |
| inputs=[tts_text, vc_pitch, f0method0, index_rate1, filter_radius0, resample_sr0, rms_mix_rate0, protect0, speed_rate], |
| outputs=[vc_log, vc_output] |
| ) |
|
|
| gr.HTML("""<div style="font-family: 'Arial', sans-serif; max-width: 800px; margin: 30px auto 20px auto; border: 1px solid #d1f2d1; border-radius: 12px; padding: 20px; background-color: white; text-align: center;"><h3 style="color: #1b4332; font-size: 16px; margin: 0; font-weight: 700;">CREATED BY PLANA-CHAN</h3><p style="color: #94a3b8; font-size: 13px; margin-top: 4px;">BanG Dream! RVC Implementation</p></div>""") |
| |
| app.queue(max_size=20).launch(share=False, server_name="0.0.0.0", server_port=7860) |