import os import json import traceback import logging import gradio as gr import numpy as np import librosa import torch import asyncio import edge_tts import re import shutil import time from datetime import datetime from fairseq import checkpoint_utils from fairseq.data.dictionary import Dictionary from lib.infer_pack.models import ( SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono, SynthesizerTrnMs768NSFsid, SynthesizerTrnMs768NSFsid_nono, ) from vc_infer_pipeline import VC from config import Config # ============================= # LOAD ENVIRONMENT VARIABLES # ============================= HF_TOKEN = os.getenv("HF_TOKEN") if HF_TOKEN: print("šŸ”‘ Hugging Face token detected") os.environ["HUGGINGFACE_TOKEN"] = HF_TOKEN else: print("āš ļø No HF_TOKEN found") # ============================= # DOWNLOAD WEIGHTS OTOMATIS # ============================= if not os.path.exists("weights"): print("=" * 50) print("šŸš€ BANGO DREAM MYGO VOICE CONVERSION") print("=" * 50) print("šŸ“„ Mendownload weights dan bahan model dari repo Plana-RCV/BanGDream-MyGO...") try: from huggingface_hub import snapshot_download repo_id = "Plana-Archive/Premium-Model" print(f"šŸ“„ Downloading from: {repo_id}") print("šŸ“ Looking for: BanGDream-MyGO") downloaded_path = snapshot_download( repo_id=repo_id, allow_patterns=[ "BanGDream-MyGO/weights/**", "BanGDream-MyGO/hubert_base.pt", "BanGDream-MyGO/rmvpe.pt" ], local_dir=".", local_dir_use_symlinks=False, token=HF_TOKEN if HF_TOKEN else None, max_workers=2 ) print("āœ… Download completed") source_dir = "BanGDream-MyGO" if os.path.exists(source_dir): print(f"šŸ“‚ Moving files from: {source_dir}") for item in os.listdir(source_dir): s = os.path.join(source_dir, item) d = os.path.join(".", item) if os.path.isdir(s): if os.path.exists(d): shutil.rmtree(d) shutil.move(s, d) else: shutil.move(s, d) shutil.rmtree(source_dir) print("āœ… Files moved successfully") folder_info_path = os.path.join("weights", "folder_info.json") if not os.path.exists(folder_info_path): folder_info = { "BanGDream-MyGO": { "title": "BanG Dream! MyGO!!!!!", "folder_path": "BanGDream-MyGO", "description": "Official RVC Weights for BanG Dream! MyGO!!!!! characters", "enable": True } } with open(folder_info_path, "w", encoding="utf-8") as f: json.dump(folder_info, f, indent=2, ensure_ascii=False) print(f"šŸ“„ Created folder_info.json") else: print("āŒ Source directory not found after download!") except Exception as e: print(f"āš ļø Download failed: {str(e)}") traceback.print_exc() print("\nšŸ“ Manual setup:") print("1. Create folder: weights/") print("2. Download from: https://huggingface.co/Plana-Archive/Plana-RCV/tree/main/BanGDream-MyGO") print("3. Put BanGDream-MyGO folder in weights/") config = Config() logging.getLogger("numba").setLevel(logging.WARNING) logging.getLogger("fairseq").setLevel(logging.WARNING) model_cache = {} hubert_loaded = False hubert_model = None spaces = True if spaces: audio_mode = ["Upload audio", "TTS Audio"] else: audio_mode = ["Input path", "Upload audio", "TTS Audio"] f0method_mode = ["pm", "harvest"] if os.path.isfile("rmvpe.pt"): f0method_mode.insert(2, "rmvpe") def clean_title(title): title = re.sub(r'^BanG Dream[!]?\s*MyGO[!]*\s*-\s*', '', title, flags=re.IGNORECASE) return re.sub(r'\s*-\s*\d+\s*epochs', '', title, flags=re.IGNORECASE) def _load_audio_input(vc_audio_mode, vc_input, vc_upload, tts_text, spaces_limit=20): temp_file = None try: if vc_audio_mode == "Input path" and vc_input: audio, sr = librosa.load(vc_input, sr=16000, mono=True) return audio.astype(np.float32), 16000, None elif vc_audio_mode == "Upload audio": if vc_upload is None: raise ValueError("Mohon upload file audio terlebih dahulu!") sampling_rate, audio = vc_upload if audio.dtype != np.float32: audio = audio.astype(np.float32) / np.iinfo(audio.dtype).max if len(audio.shape) > 1: audio = np.mean(audio, axis=0) if sampling_rate != 16000: audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000, res_type='kaiser_fast') return audio.astype(np.float32), 16000, None elif vc_audio_mode == "TTS Audio": if not tts_text or tts_text.strip() == "": raise ValueError("Mohon masukkan teks untuk TTS!") temp_file = "tts_temp.wav" async def tts_task(): return await edge_tts.Communicate(tts_text, "ja-JP-NanamiNeural").save(temp_file) try: asyncio.run(asyncio.wait_for(tts_task(), timeout=10)) except asyncio.TimeoutError: raise ValueError("TTS timeout! Silakan coba lagi.") audio, sr = librosa.load(temp_file, sr=16000, mono=True) return audio.astype(np.float32), 16000, temp_file except Exception as e: if temp_file and os.path.exists(temp_file): os.remove(temp_file) raise e raise ValueError("Invalid audio mode or missing input.") def adjust_audio_speed(audio, speed): if speed == 1.0: return audio return librosa.effects.time_stretch(audio.astype(np.float32), rate=speed) def preprocess_audio(audio): if np.max(np.abs(audio)) > 1.0: audio = audio / np.max(np.abs(audio)) * 0.9 return audio.astype(np.float32) def create_vc_fn(model_key, tgt_sr, net_g, vc, if_f0, version, file_index): def vc_fn( vc_audio_mode, vc_input, vc_upload, tts_text, f0_up_key, f0_method, index_rate, filter_radius, resample_sr, rms_mix_rate, protect, speed, ): temp_audio_file = None try: if torch.cuda.is_available(): torch.cuda.empty_cache() net_g.to(config.device) yield "Status: šŸš€ Memproses audio...", None audio, sr, temp_audio_file = _load_audio_input(vc_audio_mode, vc_input, vc_upload, tts_text) audio = preprocess_audio(audio) audio_tensor = torch.FloatTensor(audio).to(config.device) times = [0, 0, 0] max_chunk_size = 16000 * 30 if len(audio) > max_chunk_size: chunks = [] for i in range(0, len(audio), max_chunk_size): chunk = audio[i:i + max_chunk_size] chunk_tensor = torch.FloatTensor(chunk).to(config.device) chunk_opt = vc.pipeline( hubert_model, net_g, 0, chunk_tensor, "chunk" if vc_input else "temp", times, int(f0_up_key), f0_method, file_index, index_rate, if_f0, filter_radius, tgt_sr, resample_sr, rms_mix_rate, version, protect, f0_file=None, ) chunks.append(chunk_opt) audio_opt = np.concatenate(chunks) else: audio_opt = vc.pipeline( hubert_model, net_g, 0, audio_tensor, vc_input if vc_input else "temp", times, int(f0_up_key), f0_method, file_index, index_rate, if_f0, filter_radius, tgt_sr, resample_sr, rms_mix_rate, version, protect, f0_file=None, ) audio_opt = audio_opt.astype(np.float32) if speed != 1.0: audio_opt = adjust_audio_speed(audio_opt, speed) if np.max(np.abs(audio_opt)) > 0: audio_opt = (audio_opt / np.max(np.abs(audio_opt)) * 0.9).astype(np.float32) yield "Status: āœ… Selesai!", (tgt_sr, audio_opt) except Exception as e: yield f"āŒ Error: {str(e)}\n\n{traceback.format_exc()}", None finally: if temp_audio_file and os.path.exists(temp_audio_file): os.remove(temp_audio_file) if torch.cuda.is_available(): torch.cuda.empty_cache() if model_key not in model_cache: net_g.to('cpu') return vc_fn def create_model_info_from_files(base_path): mygo_dir = os.path.join(base_path, "BanGDream-MyGO") if not os.path.exists(mygo_dir): return model_info_path = os.path.join(mygo_dir, "model_info.json") model_info = {} for char_folder in os.listdir(mygo_dir): char_path = os.path.join(mygo_dir, char_folder) if not os.path.isdir(char_path): continue pth_files = [f for f in os.listdir(char_path) if f.endswith('.pth')] index_files = [f for f in os.listdir(char_path) if f.endswith('.index')] image_files = [f for f in os.listdir(char_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))] if not pth_files: continue char_name_formatted = re.sub(r"([a-z])([A-Z])", r"\1 \2", char_folder) model_info[char_folder] = { "enable": True, "model_path": pth_files[0], "title": f"MyGO - {char_name_formatted}", "cover": image_files[0] if image_files else "cover.png", "feature_retrieval_library": index_files[0] if index_files else "", "author": "Plana-Archive" } with open(model_info_path, "w", encoding="utf-8") as f: json.dump(model_info, f, indent=2, ensure_ascii=False) print(f"āœ… Created model_info.json with {len(model_info)} characters") return model_info def load_model(): categories = [] base_path = "weights" if not os.path.exists(base_path): print(f"āŒ Folder '{base_path}' not found!") return categories folder_info_path = f"{base_path}/folder_info.json" if not os.path.isfile(folder_info_path): folder_info = { "BanGDream-MyGO": { "title": "BanG Dream! MyGO!!!!!", "folder_path": "BanGDream-MyGO", "description": "Official RVC Weights for BanG Dream! MyGO!!!!! characters", "enable": True } } with open(folder_info_path, "w", encoding="utf-8") as f: json.dump(folder_info, f, indent=2, ensure_ascii=False) with open(folder_info_path, "r", encoding="utf-8") as f: folder_info = json.load(f) for category_name, category_info in folder_info.items(): if not category_info.get('enable', True): continue category_title, category_folder, description = ( category_info['title'], category_info['folder_path'], category_info['description'] ) models = [] model_info_path = f"{base_path}/{category_folder}/model_info.json" if not os.path.exists(model_info_path): model_info = create_model_info_from_files(base_path) if not model_info: continue if os.path.exists(model_info_path): with open(model_info_path, "r", encoding="utf-8") as f: models_info = json.load(f) for character_name, info in models_info.items(): if not info.get('enable', True): continue model_title, model_name, model_author = ( info['title'], info['model_path'], info.get("author") ) cache_key = f"{category_folder}_{character_name}" if cache_key in model_cache: tgt_sr, net_g, vc, if_f0, version, model_index = model_cache[cache_key] else: model_cover = f"{base_path}/{category_folder}/{character_name}/{info['cover']}" model_index = f"{base_path}/{category_folder}/{character_name}/{info['feature_retrieval_library']}" model_path = f"{base_path}/{category_folder}/{character_name}/{model_name}" cpt = torch.load(model_path, map_location="cpu") tgt_sr = cpt["config"][-1] cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] if_f0, version = cpt.get("f0", 1), cpt.get("version", "v1") if version == "v1": if if_f0 == 1: net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half) else: net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) else: if if_f0 == 1: net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half) else: net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) if hasattr(net_g, "enc_q"): del net_g.enc_q net_g.load_state_dict(cpt["weight"], strict=False) net_g.eval().to('cpu') vc = VC(tgt_sr, config) model_cache[cache_key] = (tgt_sr, net_g, vc, if_f0, version, model_index) models.append(( character_name, model_title, model_author, f"{base_path}/{category_folder}/{character_name}/{info['cover']}", version, create_vc_fn(cache_key, tgt_sr, net_g, vc, if_f0, version, model_index) )) categories.append([category_title, category_folder, description, models]) return categories def load_hubert(): global hubert_model, hubert_loaded if hubert_loaded: return torch.serialization.add_safe_globals([Dictionary]) models, _, _ = checkpoint_utils.load_model_ensemble_and_task( ["hubert_base.pt"], suffix="", ) hubert_model = models[0].to(config.device) hubert_model = hubert_model.half() if config.is_half else hubert_model.float() hubert_model.eval() hubert_loaded = True def change_audio_mode(vc_audio_mode): is_input_path = vc_audio_mode == "Input path" is_upload = vc_audio_mode == "Upload audio" is_tts = vc_audio_mode == "TTS Audio" return ( gr.Textbox.update(visible=is_input_path), gr.Audio.update(visible=is_upload), gr.Textbox.update(visible=is_tts, lines=4 if is_tts else 2) ) # CSS (sama seperti sebelumnya) css = """ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Quicksand:wght@400;600;700&display=swap'); body, .gradio-container { background-color: #ffffff !important; font-family: 'Inter', sans-serif !important; } footer { display: none !important; } .arona-loading-container { display: flex; align-items: center; justify-content: center; gap: 15px; margin-top: 15px; padding: 10px; } .loading-text-blue { font-family: 'Quicksand', sans-serif; font-size: 20px; font-weight: 700; color: #00b0ff; letter-spacing: 1px; } .loading-gif-small { width: 100px; height: auto; border-radius: 8px; } .header-img-container { text-align: center; padding: 10px 0; background: #ffffff !important; } .header-img { width: 100%; max-width: 500px; border-radius: 15px; margin: 0 auto; display: block; } .status-card { background: #ffffff; border: 1px solid #e1f0ff; border-radius: 14px; padding: 15px 10px; margin: 0 auto 15px auto; max-width: 400px; display: flex; flex-direction: column; align-items: center; } .status-online-box { display: flex; align-items: center; gap: 8px; margin-bottom: 12px; } .status-details-container { display: flex; width: 100%; justify-content: center; align-items: center; border-top: 1px solid #f0f7ff; padding-top: 10px; } .status-detail-item { flex: 1; display: flex; flex-direction: column; align-items: center; text-align: center; } .status-detail-item:first-child { border-right: 1px solid #e1f0ff; } .status-text-main { font-size: 13px !important; font-weight: 600; color: #546e7a; } .status-text-sub { font-size: 11px !important; color: #90a4ae; } .dot-online { height: 8px; width: 8px; background-color: #2ecc71; border-radius: 50%; display: inline-block; animation: blink-green 1.5s infinite; } @keyframes blink-green { 0% { opacity: 1; } 50% { opacity: 0.4; } 100% { opacity: 1; } } .gr-form .gr-block label span, .gr-box label span, .gr-panel label span { background: linear-gradient(135deg, #4fc3f7 0%, #00b0ff 100%) !important; color: white !important; padding: 4px 12px !important; border-radius: 8px !important; font-weight: 600 !important; box-shadow: 0 0 15px rgba(79, 195, 247, 0.4) !important; } input[type="range"] { accent-color: #00b0ff !important; } .char-scroll-box { display: grid !important; grid-template-columns: repeat(2, 1fr) !important; gap: 12px !important; max-height: 280px; overflow-y: auto; padding: 15px; background: #ffffff; border: 2px solid #eef5ff; border-radius: 14px; } .char-card { background: white; padding: 12px; border-radius: 12px; cursor: pointer; border: 1px solid #e1f5fe; border-left: 5px solid #4fc3f7; transition: all 0.2s ease; display: flex; flex-direction: column; height: 65px; } .char-name-jp { font-weight: 700; font-size: 11px !important; color: #455a64; } .char-name-en { font-size: 8.5px !important; color: #90a4ae; text-transform: uppercase; } .speed-section { margin-top: 20px; padding: 18px; border-radius: 20px; background: linear-gradient(135deg, #f0f7ff 0%, #ffffff 100%); border: 2px solid #e1f0ff; } .speed-title { font-family: 'Quicksand', sans-serif; font-weight: 700; color: #4ea8de; text-align: center; margin-bottom: 12px; font-size: 14px; } .generate-btn { font-family: 'Quicksand', sans-serif; font-weight: 700 !important; background: linear-gradient(135deg, #64b5f6 0%, #2196f3 100%) !important; color: white !important; border-radius: 12px !important; } .footer-text { text-align: center; padding: 20px; border-top: 1px solid #f0f4f8; color: #b0bec5; font-size: 11px; } .speed-notes-box { font-family: 'Arial'; border: 1px solid #ffd8b2; border-radius: 8px; padding: 12px; background: #fff7ed; border-left: 4px solid #fb923c; margin-top: 10px; } .speed-notes-title { color: #c2410c; font-size: 12px; margin: 0 0 5px 0; font-weight: bold; } .speed-notes-content { color: #9a3412; font-size: 11px; margin: 0; } .video-demo-container { text-align: center; padding: 20px; background: #ffffff; border-radius: 20px; border: 2px solid #e1f0ff; margin: 20px auto; max-width: 800px; } .video-demo-title { font-family: 'Quicksand', sans-serif; font-weight: 700; color: #4fc3f7; font-size: 18px; margin-bottom: 15px; } .video-demo-player { width: 100%; border-radius: 15px; box-shadow: 0 10px 30px rgba(0, 176, 255, 0.2); } """ if __name__ == '__main__': load_hubert() categories = load_model() total_models = sum(len(models) for _, _, _, models in categories) with gr.Blocks(css=css, theme=gr.themes.Soft()) as app: gr.HTML('
') gr.HTML(f'''
System Online
šŸŽø {total_models} MembersReady
šŸ“Š TotalDatabase: {total_models}
''') with gr.Row(): with gr.Column(scale=1): pass with gr.Column(scale=3): gr.HTML("""
āœ… [ON] MODE YURI šŸ’š
""") with gr.Column(scale=1): pass for cat_idx, (folder_title, folder, description, models) in enumerate(categories): with gr.TabItem(folder_title): with gr.Accordion("šŸ“‘ Select Member", open=True): char_html = "".join([f'
{clean_title(title)}{name}
' for name, title, author, cover, version, vc_fn in models]) gr.HTML(f'
{char_html}
') with gr.Tabs(): for model_idx, (name, title, author, cover, model_version, vc_fn) in enumerate(models): with gr.TabItem(name, id=f"model_{cat_idx}_{model_idx}"): with gr.Row(): with gr.Column(scale=1): gr.HTML(f'
{clean_title(title)}
{model_version} • {author}
') with gr.Column(scale=2): with gr.Group(): vc_audio_mode = gr.Dropdown(label="Input Mode", choices=audio_mode, value="TTS Audio") vc_input = gr.Textbox(visible=False) # HAPUS CHECKBOX MICROPHONE – langsung gunakan upload audio vc_upload = gr.Audio(label="Upload Audio Source", visible=False) tts_text = gr.Textbox(label="TTS Text", visible=True, placeholder="Type message here...", lines=3) with gr.Row(): with gr.Column(): vc_transform0 = gr.Slider(minimum=-12, maximum=12, label="Pitch (Nada)", value=12, step=1) f0method0 = gr.Radio(label="Conversion Algorithm", choices=f0method_mode, value="rmvpe") with gr.Column(): with gr.Accordion("āš™ļø SETTINGS āš™ļø", open=False): index_rate1 = gr.Slider(0, 1, label="Index Rate", value=0.75) filter_radius0 = gr.Slider(0, 7, label="Filter", value=7, step=1) resample_sr0 = gr.Slider(0, 48000, label="Resample", value=0) rms_mix_rate0 = gr.Slider(0, 1, label="Volume Mix", value=0.76) protect0 = gr.Slider(0, 0.5, label="Voice Protect", value=0.33) with gr.Column(elem_classes="speed-section"): gr.HTML('
⚔ KECEPATAN SUARA ⚔
') speed_slider = gr.Slider(0.5, 2.0, value=1.0, step=0.1, label=None) gr.HTML("""
šŸŒ„ļø CATATAN KECIL šŸŒ„ļø
• Kiri (0.5): untuk mempercepat Suara
• āœ… Tengah (1.0): untuk Kecepatan normal (disarankan)
• 🚫 Kanan (2.0): Mempercepat suara (tidak di sarankan)

Tips: Atur ke kiri untuk suara lebih lambat dan atur ke kanan untuk suara lebih cepat. Disarankan tetap di 1.0 untuk hasil normal atau ubah jadi 08 atau 09.
""") gr.HTML('
Let\'s Play Music!
') with gr.Column(scale=1): vc_log = gr.Textbox(label="Process Logs", interactive=False) vc_output = gr.Audio(label="Result Audio", interactive=False) vc_convert = gr.Button("šŸŽø GENERATE VOICE šŸŽø", variant="primary", elem_classes="generate-btn") vc_convert.click( fn=vc_fn, inputs=[vc_audio_mode, vc_input, vc_upload, tts_text, vc_transform0, f0method0, index_rate1, filter_radius0, resample_sr0, rms_mix_rate0, protect0, speed_slider], outputs=[vc_log, vc_output] ) vc_audio_mode.change(fn=change_audio_mode, inputs=[vc_audio_mode], outputs=[vc_input, vc_upload, tts_text]) gr.HTML('') app.load(None, None, None, js="""() => { window.selectModel = (cat, mod) => { const tabs = document.querySelectorAll('.tabs .tab-nav button'); for (let t of tabs) { if (t.textContent.trim() === cat) { t.click(); setTimeout(() => { const mTabs = document.querySelectorAll('.tabs .tab-nav button'); for (let mt of mTabs) { if (mt.textContent.trim() === mod) mt.click(); } }, 50); break; } } } }""") # PERBAIKAN: Gunakan share=True dan tanpa parameter server_name/port app.queue(max_size=3).launch(share=True)