Spaces:
Running
Running
| import argparse | |
| import gradio as gr | |
| import json | |
| import os | |
| import sys | |
| import hashlib | |
| import time | |
| import math | |
| import re | |
| import zipfile | |
| from typing import Any, List, Dict, Optional, Tuple | |
| from pathlib import Path | |
| import numpy as np | |
| from pydub import AudioSegment | |
| from diffsinger_utau.voice_bank import PredAll | |
| from diffsinger_utau.voice_bank.commons.ds_reader import DSReader | |
| from diffsinger_utau.voice_bank.commons.phome_num_counter import Phome | |
| from pypinyin import pinyin, Style | |
| from pypinyin.constants import RE_HANS | |
| # —— 文本预处理:相邻纯汉字不加空格,其余保留空格 —— | |
| def _is_hans_token(s: str) -> bool: | |
| try: | |
| return bool(RE_HANS and RE_HANS.fullmatch(s)) | |
| except Exception: | |
| return False | |
| def preprocess_zh_spaces(text: str) -> str: | |
| parts = [p for p in (text or "").split(" ") if p != ""] | |
| if not parts: | |
| return "" | |
| out = [] | |
| for i, part in enumerate(parts): | |
| if i == 0: | |
| out.append(part) | |
| else: | |
| prev = parts[i - 1] | |
| if _is_hans_token(prev) and _is_hans_token(part): | |
| out[-1] = out[-1] + part | |
| else: | |
| out.append(" " + part) | |
| return "".join(out) | |
| def validate_lyric_format(modified_text: str, original_text: str) -> Tuple[bool, str]: | |
| """ | |
| 校验歌词格式是否与原始文本匹配 | |
| 返回: (是否匹配, 渲染后的原始文本或空字符串) | |
| """ | |
| if not original_text: | |
| return True, "" | |
| # 去掉空格后比较 | |
| modified_clean = re.sub(r'\s+', '', modified_text) | |
| original_clean = re.sub(r'\s+', '', original_text) | |
| # 长度检查 | |
| if len(modified_clean) != len(original_clean): | |
| return False, render_original_with_highlights(original_text, modified_text) | |
| # AP/SP 位置检查 | |
| modified_ap_sp_positions = [] | |
| original_ap_sp_positions = [] | |
| # 找到修改后文本中的 AP/SP 位置 | |
| for match in re.finditer(r'\b(AP|SP)\b', modified_text): | |
| modified_ap_sp_positions.append((match.start(), match.group())) | |
| # 找到原始文本中的 AP/SP 位置 | |
| for match in re.finditer(r'\b(AP|SP)\b', original_text): | |
| original_ap_sp_positions.append((match.start(), match.group())) | |
| # 比较 AP/SP 的数量和类型 | |
| if len(modified_ap_sp_positions) != len(original_ap_sp_positions): | |
| return False, render_original_with_highlights(original_text, modified_text) | |
| # 检查每个 AP/SP 的相对位置是否一致 | |
| for (mod_pos, mod_type), (orig_pos, orig_type) in zip(modified_ap_sp_positions, original_ap_sp_positions): | |
| if mod_type != orig_type: | |
| return False, render_original_with_highlights(original_text, modified_text) | |
| # 计算相对位置(在去空格后的字符串中) | |
| mod_relative_pos = len(re.sub(r'\s+', '', modified_text[:mod_pos])) | |
| orig_relative_pos = len(re.sub(r'\s+', '', original_text[:orig_pos])) | |
| if mod_relative_pos != orig_relative_pos: | |
| return False, render_original_with_highlights(original_text, modified_text) | |
| return True, "" | |
| def render_original_with_highlights(original_text: str, modified_text: str) -> str: | |
| """ | |
| 渲染原始文本,用灰色字体显示,位置不一致的 AP/SP 用红色标记 | |
| """ | |
| # 找到修改后和原始文本中的 AP/SP 位置 | |
| modified_ap_sp = set() | |
| original_ap_sp = set() | |
| for match in re.finditer(r'\b(AP|SP)\b', modified_text): | |
| pos = len(re.sub(r'\s+', '', modified_text[:match.start()])) | |
| modified_ap_sp.add((pos, match.group())) | |
| result_parts = [] | |
| i = 0 | |
| clean_pos = 0 | |
| while i < len(original_text): | |
| # 检查当前位置是否是 AP 或 SP | |
| if original_text[i:i+2] in ['AP', 'SP'] and (i == 0 or not original_text[i-1].isalnum()) and (i+2 >= len(original_text) or not original_text[i+2].isalnum()): | |
| ap_sp = original_text[i:i+2] | |
| # 检查这个 AP/SP 在修改后的文本中是否在相同位置 | |
| if (clean_pos, ap_sp) not in modified_ap_sp: | |
| result_parts.append(f'<span style="color: red;">{ap_sp}</span>') | |
| else: | |
| result_parts.append(ap_sp) | |
| i += 2 | |
| clean_pos += 2 | |
| elif original_text[i].isspace(): | |
| result_parts.append(original_text[i]) | |
| i += 1 | |
| else: | |
| result_parts.append(original_text[i]) | |
| i += 1 | |
| clean_pos += 1 | |
| return f'<span style="color: gray;">{"".join(result_parts)}</span>' | |
| # 试图导入 diffsinger-utau(按要求使用该库,而非自行实现) | |
| try: | |
| import diffsinger_utau # 类型: 忽略 | |
| except Exception as e: | |
| diffsinger_utau = None | |
| ROOT = Path(__file__).parent.resolve() | |
| MODELS_DIR = ROOT / "models" | |
| PUBLIC_TEMPLATES_DIR = ROOT / "templates" / "public" | |
| USER_TEMPLATES_DIR = ROOT / "templates" / "user" | |
| OUTPUT_DIR = ROOT / "output" / "pred_all" | |
| CACHE_DIR = ROOT / "cache" | |
| AUDIO_EXTS = [".wav", ".mp3", ".flac", ".m4a", ".ogg"] | |
| # 预创建最大可编辑句子数,避免在事件中动态创建组件 | |
| MAX_LINES = 200 | |
| def ensure_dirs(): | |
| for p in [MODELS_DIR, PUBLIC_TEMPLATES_DIR, USER_TEMPLATES_DIR, OUTPUT_DIR, CACHE_DIR]: | |
| p.mkdir(parents=True, exist_ok=True) | |
| def list_model_choices() -> List[str]: | |
| # 模型以目录名为选择项;也允许单文件模型 | |
| choices = [] | |
| if MODELS_DIR.exists(): | |
| for p in sorted(MODELS_DIR.iterdir()): | |
| if p.is_dir(): | |
| choices.append(str(p.relative_to(ROOT))) | |
| elif p.is_file(): | |
| # 单文件权重 | |
| choices.append(str(p.relative_to(ROOT))) | |
| return choices | |
| def find_templates() -> Dict[str, Path]: | |
| """ | |
| 返回 {模板名(不含扩展名): 模板路径} | |
| 用户目录覆盖公开目录 | |
| """ | |
| results: Dict[str, Path] = {} | |
| # 先公开 | |
| if PUBLIC_TEMPLATES_DIR.exists(): | |
| for p in PUBLIC_TEMPLATES_DIR.glob("*.ds"): | |
| results[p.stem] = p | |
| # 后用户(覆盖) | |
| if USER_TEMPLATES_DIR.exists(): | |
| for p in USER_TEMPLATES_DIR.glob("*.ds"): | |
| results[p.stem] = p | |
| return results | |
| def bgm_path_for(template_path: Path) -> Optional[Path]: | |
| base = template_path.with_suffix("") | |
| for ext in AUDIO_EXTS: | |
| cand = Path(str(base) + ext) | |
| if cand.exists(): | |
| return cand | |
| return None | |
| def load_ds(template_path: Path): | |
| ds = DSReader(template_path).read_ds() | |
| return ds | |
| def audiosegment_from_file(path: Path) -> AudioSegment: | |
| return AudioSegment.from_file(str(path)) | |
| def export_wav(seg: AudioSegment, path: Path): | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| seg.export(str(path), format="wav") | |
| def overlay_bgm_snippet(vocal_wav: Path, bgm_audio: AudioSegment, offset_sec: float, bgm_volume: float = 0.3, vocal_gain_db: float = 0.0) -> AudioSegment: | |
| vocal = audiosegment_from_file(vocal_wav) | |
| if vocal_gain_db != 0.0: | |
| vocal = vocal + vocal_gain_db | |
| start_ms = max(int(offset_sec * 1000), 0) | |
| # 应用 BGM 音量倍率 | |
| if bgm_volume <= 0.0: | |
| base = AudioSegment.silent(duration=start_ms + len(vocal)) | |
| else: | |
| gain_db = 20.0 * math.log10(bgm_volume) | |
| bgm_audio = bgm_audio + gain_db | |
| base = bgm_audio | |
| # 确保底轨足够长 | |
| if len(base) < start_ms + len(vocal): | |
| pad_ms = start_ms + len(vocal) - len(base) | |
| base = base + AudioSegment.silent(duration=pad_ms) | |
| mixed = base.overlay(vocal, position=start_ms) | |
| return mixed[start_ms : start_ms + len(vocal)] # pyright: ignore[reportReturnType] | |
| def concat_with_offsets(clips: List[Tuple[AudioSegment, float]]) -> AudioSegment: | |
| # 根据 offset 将多个片段放置在时间线上,输出包含至最大结束时间 | |
| if not clips: | |
| return AudioSegment.silent(duration=0) | |
| max_end_ms = 0 | |
| for seg, offset in clips: | |
| start = int(max(offset, 0) * 1000) | |
| max_end_ms = max(max_end_ms, start + len(seg)) | |
| timeline = AudioSegment.silent(duration=max_end_ms) | |
| for seg, offset in clips: | |
| start = int(max(offset, 0) * 1000) | |
| timeline = timeline.overlay(seg, position=start) | |
| return timeline | |
| def mix_full_song(vocal: AudioSegment, bgm: AudioSegment, bgm_volume: float = 0.3) -> AudioSegment: | |
| # 保证两者同长度 | |
| if len(bgm) < len(vocal): | |
| bgm = bgm + AudioSegment.silent(duration=(len(vocal) - len(bgm))) | |
| else: | |
| vocal = vocal + AudioSegment.silent(duration=(len(bgm) - len(vocal))) | |
| # 应用 BGM 音量倍率 | |
| if bgm_volume <= 0.0: | |
| bgm_adj = AudioSegment.silent(duration=len(bgm)) | |
| else: | |
| gain_db = 20.0 * math.log10(bgm_volume) | |
| bgm_adj = bgm + gain_db | |
| return bgm_adj.overlay(vocal) | |
| def generate_ai_prompt(lyrics_text: str) -> str: | |
| """生成AI歌词的prompt""" | |
| processed_lyrics = preprocess_zh_spaces(lyrics_text) if lyrics_text else "" | |
| prompt = f"""这是原始歌词: | |
| ```txt | |
| {processed_lyrics} | |
| ``` | |
| 其中SP和AP分别代表停顿和呼吸。你应该保留原始格式,然后按照要求替换歌词。 | |
| 保留原始格式的意思是每句歌词字数应该保持不变。 | |
| 比如 "AP 试着 SP 掬一把星辰 SP 在手心 SP" 修改为 "AP 天空 SP 赤色的晚霞 SP 刚散去 SP" 就是符合要求的。如果有多字、少字或者AP, SP位置不对,都是不符合要求的。 | |
| 现在请帮我基于上述原始歌词模板,写一首歌曲《历史的进程推着人前进》,主题为:个人奋斗固然重要,但是历史进程更加浩浩汤汤。""" | |
| return prompt | |
| def apply_ai_lyrics(ai_lyrics: str, original_lyrics: str) -> Tuple[str, str]: | |
| """应用AI生成的歌词到右侧文本框""" | |
| if not ai_lyrics or not ai_lyrics.strip(): | |
| return original_lyrics, "请先输入回填歌词" | |
| # 分割歌词为行,保留空行 | |
| ai_lines = [line.strip() for line in ai_lyrics.split('\n')] | |
| original_lines = [line.strip() for line in original_lyrics.split('\n')] | |
| # 检查行数是否一致 | |
| if len(ai_lines) != len(original_lines): | |
| return original_lyrics, f"行数不匹配:AI歌词有{len(ai_lines)}行,原始歌词有{len(original_lines)}行" | |
| # 逐行替换 | |
| new_lyrics = '\n'.join(ai_lines) | |
| return new_lyrics, "歌词应用成功!" | |
| def param_hash(model_sel: str, speaker: str, key_shift: int, steps: int, text: str) -> str: | |
| s = json.dumps( | |
| { | |
| "model": model_sel, | |
| "speaker": speaker or "", | |
| "key_shift": int(key_shift), | |
| "steps": int(steps), | |
| "text": text or "", | |
| }, | |
| ensure_ascii=False, | |
| separators=(",", ":"), | |
| sort_keys=True, | |
| ) | |
| return hashlib.md5(s.encode("utf-8")).hexdigest()[:16] | |
| class DSUEngine: | |
| """ | |
| 基于 voice_bank.PredAll 的推理引擎;。 | |
| """ | |
| def __init__(self): | |
| self.available = PredAll is not None and DSReader is not None | |
| self.predictors: Dict[str, PredAll] = {} # model_path -> PredAll 实例 | |
| def is_ready(self) -> bool: | |
| return self.available | |
| def _get_predictor(self, model_path: Path): | |
| key = str(model_path.resolve()) | |
| if key not in self.predictors: | |
| self.predictors[key] = PredAll(Path(key)) | |
| return self.predictors[key] | |
| def synth_line( | |
| self, | |
| model_path: Path, | |
| template_path: Path, | |
| line_index: int, | |
| text: str, | |
| speaker: Optional[str], | |
| key_shift: int, | |
| steps: int, | |
| out_wav: Path, | |
| ) -> None: | |
| if self.available: | |
| predictor = self._get_predictor(model_path) | |
| # 读取 ds,并替换目标行文本与必要的音素 | |
| ds_list = DSReader(template_path).read_ds() | |
| if not (0 <= line_index < len(ds_list)): | |
| raise IndexError("行索引越界") | |
| ds = ds_list[line_index] | |
| ds.replace(text) | |
| # 选择说话人 | |
| spk = speaker | |
| try: | |
| if (not spk) and getattr(predictor, "available_speakers", None): | |
| av = predictor.available_speakers | |
| if isinstance(av, (list, tuple)) and len(av) > 0: | |
| spk = av[0] | |
| except Exception: | |
| pass | |
| out_wav.parent.mkdir(parents=True, exist_ok=True) | |
| results = predictor.predict_full_pipeline( | |
| ds=ds, | |
| lang="zh", | |
| speaker=spk, | |
| key_shift=int(key_shift), | |
| pitch_steps=10, | |
| variance_steps=10, | |
| acoustic_steps=int(steps), | |
| gender=0.0, | |
| output_dir=str(out_wav.parent), | |
| save_intermediate=False, | |
| ) | |
| # 拷贝/重命名输出为指定文件名 | |
| audio_path = results.get("audio_path") if isinstance(results, dict) else None | |
| if not audio_path: | |
| raise RuntimeError("predict_full_pipeline 未返回 audio_path") | |
| src = Path(audio_path) | |
| if src.resolve() != out_wav.resolve(): | |
| if src.exists(): | |
| src.replace(out_wav) | |
| if not out_wav.exists() or out_wav.stat().st_size == 0: | |
| raise RuntimeError("未能生成音频文件") | |
| else: | |
| pass | |
| engine = DSUEngine() | |
| def get_template_choices_and_bgm_visible(): | |
| mapping = find_templates() | |
| names = sorted(mapping.keys()) | |
| # 根据当前选择动态决定 BGM 开关可见与否,默认False由前端逻辑控制 | |
| return names | |
| def on_select_template(template_name: str): | |
| mapping = find_templates() | |
| if not template_name or template_name not in mapping: | |
| return gr.update(visible=False, value=0.0), [], [] | |
| p = mapping[template_name] | |
| bgm = bgm_path_for(p) | |
| ds = load_ds(p) | |
| lines = [preprocess_zh_spaces(item.get("text", "")) for item in ds] | |
| offsets = [float(item.get("offset", 0.0)) for item in ds] | |
| bgm_update = gr.update(visible=(bgm is not None), value=(0.3 if bgm is not None else 0.0)) | |
| return bgm_update, lines, offsets | |
| def render_single_line( | |
| model_sel: str, | |
| template_name: str, | |
| line_index: int, | |
| new_text: str, | |
| speaker: str, | |
| key_shift: int, | |
| steps: int, | |
| bgm_volume: float, | |
| ) -> Tuple[Optional[str], Optional[str]]: | |
| """ | |
| 渲染单句,返回:(音频路径, 错误消息) | |
| """ | |
| try: | |
| if not model_sel: | |
| return None, "请先选择模型" | |
| mapping = find_templates() | |
| if template_name not in mapping: | |
| return None, "未找到模板" | |
| template_path = mapping[template_name] | |
| ds = load_ds(template_path) | |
| if not (0 <= line_index < len(ds)): | |
| return None, "行索引越界" | |
| # 更新该句的文本(仅用于本次渲染;不写回文件) | |
| text = new_text.strip() | |
| if not text: | |
| return None, "文本为空" | |
| # 缓存键 | |
| h = param_hash(model_sel, speaker, key_shift, steps, text) | |
| cache_dir = OUTPUT_DIR / template_name / h | |
| cache_dir.mkdir(parents=True, exist_ok=True) | |
| wav_out = cache_dir / f"line_{line_index+1}.wav" | |
| if not wav_out.exists(): | |
| # 执行推理 | |
| model_path = ROOT / model_sel | |
| engine.synth_line( | |
| model_path=model_path, | |
| template_path=template_path, | |
| line_index=line_index, | |
| text=text, | |
| speaker=speaker or None, | |
| key_shift=int(key_shift), | |
| steps=int(steps), | |
| out_wav=wav_out, | |
| ) | |
| # 是否混音预览 | |
| mapping_bgm = bgm_path_for(template_path) | |
| if (bgm_volume and bgm_volume > 0.0) and mapping_bgm and mapping_bgm.exists(): | |
| bgm_audio = audiosegment_from_file(mapping_bgm) | |
| offset = float(ds[line_index].get("offset", 0.0)) | |
| mixed_seg = overlay_bgm_snippet(wav_out, bgm_audio, offset, bgm_volume) | |
| preview_wav = cache_dir / f"line_{line_index+1}_preview.wav" | |
| export_wav(mixed_seg, preview_wav) | |
| return str(preview_wav), None | |
| return str(wav_out), None | |
| except Exception as e: | |
| return None, f"{type(e).__name__}: {e}" | |
| def generate_full_song( | |
| model_sel: str, | |
| template_name: str, | |
| lines: List[str], | |
| speaker: str, | |
| key_shift: int, | |
| steps: int, | |
| bgm_volume: float, | |
| ) -> Tuple[Optional[str], Optional[str]]: | |
| """ | |
| 生成整曲,返回:(主输出音频路径, 混音输出音频路径或错误消息字符串) | |
| """ | |
| try: | |
| if not model_sel: | |
| return None, "请先选择模型" | |
| mapping = find_templates() | |
| if template_name not in mapping: | |
| return None, "未找到模板" | |
| template_path = mapping[template_name] | |
| ds = load_ds(template_path) | |
| if len(ds) != len(lines): | |
| return None, "模板行数与编辑行数不一致" | |
| # 对每句生成或复用缓存 | |
| segs_with_offsets: List[Tuple[AudioSegment, float]] = [] | |
| model_path = ROOT / model_sel | |
| for idx, item in enumerate(ds): | |
| text = (lines[idx] or "").strip() | |
| if not text: | |
| continue | |
| h = param_hash(model_sel, speaker, key_shift, steps, text) | |
| cache_dir = OUTPUT_DIR / template_name / h | |
| cache_dir.mkdir(parents=True, exist_ok=True) | |
| wav_out = cache_dir / f"line_{idx+1}.wav" | |
| if not wav_out.exists(): | |
| engine.synth_line( | |
| model_path=model_path, | |
| template_path=template_path, | |
| line_index=idx, | |
| text=text, | |
| speaker=speaker or None, | |
| key_shift=int(key_shift), | |
| steps=int(steps), | |
| out_wav=wav_out, | |
| ) | |
| segs_with_offsets.append((audiosegment_from_file(wav_out), float(item.get("offset", 0.0)))) | |
| # 时间线拼接为全曲人声 | |
| vocal_full = concat_with_offsets(segs_with_offsets) | |
| final_dir = OUTPUT_DIR / template_name / "final" | |
| final_dir.mkdir(parents=True, exist_ok=True) | |
| ts_tag = time.strftime("%Y%m%d_%H%M%S") | |
| vocal_path = final_dir / f"{template_name}_vocal_{ts_tag}.wav" | |
| export_wav(vocal_full, vocal_path) | |
| # 混音版本(如有 BGM 且开启开关) | |
| mixed_path = None | |
| bgm_p = bgm_path_for(template_path) | |
| if (bgm_volume and bgm_volume > 0.0) and bgm_p and bgm_p.exists(): | |
| bgm_audio = audiosegment_from_file(bgm_p) | |
| mixed = mix_full_song(vocal_full, bgm_audio, bgm_volume) | |
| mixed_path = final_dir / f"{template_name}_mixed_{ts_tag}.wav" | |
| export_wav(mixed, mixed_path) | |
| return str(vocal_path), (str(mixed_path) if mixed_path else None) | |
| except Exception as e: | |
| return None, f"{type(e).__name__}: {e}" | |
| def save_uploaded_template(file_obj) -> str: | |
| """ | |
| 将用户上传的 ds 模板保存到 templates/user,并返回模板名。 | |
| """ | |
| try: | |
| if file_obj is None: | |
| raise ValueError("未选择文件") | |
| src = Path(file_obj.name) | |
| if src.suffix.lower() != ".ds": | |
| raise ValueError("仅支持 .ds 模板文件") | |
| USER_TEMPLATES_DIR.mkdir(parents=True, exist_ok=True) | |
| dst = USER_TEMPLATES_DIR / src.name | |
| # 读取并简单校验 | |
| data = json.loads(Path(file_obj.name).read_text(encoding="utf-8")) | |
| if not isinstance(data, list): | |
| raise ValueError("ds 文件格式错误:顶层必须是 list") | |
| dst.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8") | |
| return src.stem | |
| except Exception as e: | |
| return f"ERROR::{type(e).__name__}: {e}" | |
| def build_ui(): | |
| ensure_dirs() | |
| model_choices = list_model_choices() | |
| template_names = get_template_choices_and_bgm_visible() | |
| css = """ | |
| /* 全局:启用页面整体滚动,移除左右分栏独立滚动 */ | |
| #main-row { gap: 12px; } | |
| #left-panel, #right-panel { | |
| padding: 12px; | |
| } | |
| #left-panel { border-right: 1px solid #eee; } | |
| /* 响应式:窄屏下上下布局,宽屏左右布局 */ | |
| @media (max-width: 900px) { | |
| #main-row { flex-direction: column !important; } | |
| #left-panel { border-right: none; border-bottom: 1px solid #eee; } | |
| } | |
| /* 紧凑按钮样式 */ | |
| .compact-btn button { padding: 4px 10px !important; min-height: 30px !important; height: 30px !important; } | |
| .compact-row { gap: 8px !important; } | |
| """ | |
| with gr.Blocks(title="DiffSinger WebUI", theme=gr.themes.Soft(), css=css, head='<meta name="description" content="项目地址 https://github.com/bingcheng1998/diffsinger-webui">') as demo: | |
| with gr.Row(elem_id="main-row"): | |
| # 左栏:控制/预览(固定) | |
| with gr.Column(elem_id="left-panel", scale=1, min_width=360): | |
| # 左栏标题与模型/模板选择、上传/下载 | |
| gr.Markdown("## DiffSinger WebUI") | |
| gr.Markdown("项目地址: [https://github.com/bingcheng1998/diffsinger-webui](https://github.com/bingcheng1998/diffsinger-webui)") | |
| model_sel = gr.Dropdown(choices=model_choices, label="模型选择", value=(model_choices[0] if model_choices else None)) | |
| template_sel = gr.Dropdown(choices=template_names, label="模板选择", value=(template_names[0] if template_names else None)) | |
| with gr.Row(elem_classes=["compact-row"]): | |
| upload = gr.UploadButton("上传ds模板", file_types=[".ds"], elem_classes=["compact-btn"]) | |
| download_btn = gr.DownloadButton(label="下载ds&lrc", elem_classes=["compact-btn"]) | |
| with gr.Row(): | |
| bgm_volume = gr.Slider(0.0, 2.0, value=0.3, step=0.01, label="BGM音量", visible=False) | |
| key_shift = gr.Slider(-12, 12, value=0, step=1, label="音高偏移") | |
| steps = gr.Slider(1, 50, value=4, step=1, label="渲染步数") | |
| speaker = gr.Dropdown(label="演唱者", choices=[], value=None, interactive=True) | |
| # 单句预览与错误提示 | |
| per_line_audio = gr.Audio(label="单句预览", autoplay=True, interactive=False) | |
| per_line_error = gr.Markdown("", visible=False) | |
| # 生成控制与输出 | |
| gen_btn = gr.Button("生成整首") | |
| progress_md = gr.Markdown("", visible=True) | |
| full_vocal = gr.Audio(label="整首(人声)", autoplay=False) | |
| full_mixed = gr.Audio(label="整首(混音)", autoplay=False, visible=False) | |
| # AI歌词功能 | |
| with gr.Accordion("AI歌词", open=False): | |
| gr.Markdown("点击按钮生成prompt并复制,然后到大模型APP粘贴,将结果回填到下方文本框") | |
| # 使用Gradio 4.44.0的内置复制功能 | |
| ai_prompt_display = gr.Textbox( | |
| label="AI歌词Prompt", | |
| lines=8, | |
| max_lines=12, | |
| interactive=False, | |
| placeholder="点击下方按钮生成prompt..." | |
| ) | |
| with gr.Row(): | |
| generate_prompt_btn = gr.Button("生成Prompt", variant="primary") | |
| copy_prompt_btn = gr.Button("📋 复制", variant="secondary") | |
| ai_lyrics_input = gr.Textbox( | |
| label="回填歌词", | |
| lines=10, | |
| max_lines=15, | |
| placeholder="请将大模型生成的歌词粘贴到这里..." | |
| ) | |
| apply_lyrics_btn = gr.Button("应用歌词", variant="primary") | |
| # 右栏:模板与歌词编辑 | |
| with gr.Column(elem_id="right-panel", scale=2): | |
| # 状态与歌词编辑容器(右栏仅歌词编辑) | |
| lines_state = gr.State([]) | |
| offsets_state = gr.State([]) | |
| # 生成控制状态 | |
| stop_flag = gr.State(False) | |
| generating_flag = gr.State(False) | |
| dyn = gr.Column() | |
| # 预创建文本框和错误提示,依据当前模板设置初始可见性和值 | |
| textboxes = [] | |
| error_markdowns = [] | |
| original_lines_state = gr.State([]) # 存储原始歌词用于校验 | |
| init_lines = [] | |
| init_offsets = [] | |
| if template_sel.value: | |
| try: | |
| _, init_lines, init_offsets = on_select_template(template_sel.value) | |
| except Exception: | |
| init_lines, init_offsets = [], [] | |
| with dyn: | |
| for i in range(MAX_LINES): | |
| visible = i < len(init_lines) | |
| val = init_lines[i] if visible else "" | |
| tb = gr.Textbox(value=val, label=f"第 {i+1} 句", lines=1, max_lines=1, visible=visible) | |
| textboxes.append(tb) | |
| # 为每个文本框添加对应的错误提示 | |
| error_md = gr.Markdown("", visible=False) | |
| error_markdowns.append(error_md) | |
| if template_sel.value: | |
| lines_state.value = init_lines | |
| offsets_state.value = init_offsets | |
| original_lines_state.value = init_lines.copy() | |
| # 事件:选择模板时,更新 BGM 开关、整首混音可见性与文本框内容 | |
| def on_template_change(template_name): | |
| bgm_update, lines, offsets = on_select_template(template_name) | |
| has_bgm = bool(bgm_update.get("visible", False)) if isinstance(bgm_update, dict) else False | |
| tb_updates = [] | |
| error_updates = [] | |
| n = len(lines) | |
| for i, tb in enumerate(textboxes): | |
| if i < n: | |
| tb_updates.append(gr.update(value=lines[i], visible=True)) | |
| error_updates.append(gr.update(value="", visible=False)) | |
| else: | |
| tb_updates.append(gr.update(value="", visible=False)) | |
| error_updates.append(gr.update(value="", visible=False)) | |
| # 返回:BGM、模板下拉、状态、错误清空、整首混音可见性(按是否存在BGM),原始歌词状态,以及所有文本框和错误提示更新 | |
| return ( | |
| bgm_update, | |
| gr.update(choices=get_template_choices_and_bgm_visible(), value=template_name), | |
| lines, | |
| offsets, | |
| gr.update(value="", visible=False), | |
| gr.update(visible=has_bgm), | |
| lines.copy(), | |
| *tb_updates, | |
| *error_updates, | |
| ) | |
| # 模型切换:动态更新 speaker 下拉项 | |
| def on_model_change(model_path_rel): | |
| try: | |
| if not model_path_rel: | |
| return gr.update(choices=[], value=None, interactive=False) | |
| model_path = ROOT / model_path_rel | |
| choices = [] | |
| if getattr(engine, "available", False) and model_path.exists(): | |
| predictor = engine._get_predictor(model_path) | |
| av = getattr(predictor, "available_speakers", None) | |
| if isinstance(av, (list, tuple)): | |
| choices = list(av) | |
| if choices: | |
| return gr.update(choices=choices, value=choices[0], interactive=True) | |
| else: | |
| return gr.update(choices=[], value=None, interactive=False) | |
| except Exception: | |
| return gr.update(choices=[], value=None, interactive=False) | |
| model_sel.change( | |
| fn=on_model_change, | |
| inputs=[model_sel], | |
| outputs=[speaker], | |
| ) | |
| # BGM 音量倍率变化:控制“整首(混音)”可见性(需当前模板存在 BGM 且倍率>0) | |
| def on_bgm_volume_change(bgm_vol, template_name): | |
| mapping = find_templates() | |
| has_bgm = False | |
| if template_name in mapping: | |
| has_bgm = bgm_path_for(mapping[template_name]) is not None | |
| return gr.update(visible=bool(bgm_vol and bgm_vol > 0 and has_bgm)) | |
| bgm_volume.change( | |
| fn=on_bgm_volume_change, | |
| inputs=[bgm_volume, template_sel], | |
| outputs=[full_mixed], | |
| ) | |
| # 模板切换:批量更新预创建文本框 + 初始化整首混音可见性 | |
| template_sel.change( | |
| fn=on_template_change, | |
| inputs=[template_sel], | |
| outputs=[bgm_volume, template_sel, lines_state, offsets_state, per_line_error, full_mixed, original_lines_state, *textboxes, *error_markdowns], | |
| ) | |
| # 上传模板:将用户 .ds 保存到 templates/user,并刷新模板下拉 | |
| def on_upload_ds(file_obj): | |
| try: | |
| if not file_obj: | |
| raise gr.Error("未选择文件") | |
| # gr.UploadButton 返回字典/路径,兼容不同返回 | |
| import shutil | |
| from pathlib import Path as _P | |
| src = _P(file_obj.name) if hasattr(file_obj, "name") else _P(str(file_obj)) | |
| if src.suffix.lower() != ".ds": | |
| raise gr.Error("仅支持 .ds 文件") | |
| dst_dir = USER_TEMPLATES_DIR | |
| dst_dir.mkdir(parents=True, exist_ok=True) | |
| dst = dst_dir / src.name | |
| shutil.copyfile(src, dst) | |
| # 刷新模板列表,并选中新上传的模板名(无扩展) | |
| new_choices = get_template_choices_and_bgm_visible() | |
| base = src.stem | |
| # 若同名覆盖,模板名以去扩展后的相对名为准 | |
| # 确认在 choices 中(user 覆盖 public) | |
| if base not in new_choices and (dst_dir / src.name).exists(): | |
| # 有些实现是用完整相对路径名,这里退化为重新计算 choices | |
| pass | |
| return gr.update(choices=new_choices, value=base) | |
| except Exception as e: | |
| raise gr.Error(f"上传失败: {e}") | |
| upload.upload( | |
| fn=on_upload_ds, | |
| inputs=[upload], | |
| outputs=[template_sel], | |
| ) | |
| # 初始构建已通过预创建完成 | |
| # 文本提交事件:逐句渲染(为预创建文本框绑定) | |
| for idx, tb in enumerate(textboxes): | |
| def make_submit(i): | |
| def _submit(new_text, lines_list, original_lines_list, model_sel_v, template_sel_v, speaker_v, key_shift_v, steps_v, bgm_volume_v): | |
| # 仅处理当前可见范围内的行 | |
| if not isinstance(lines_list, list) or i >= max(len(lines_list), 0): | |
| return gr.update(), gr.update(), gr.update(), lines_list | |
| # 校验歌词格式 | |
| original_text = original_lines_list[i] if i < len(original_lines_list) else "" | |
| is_valid, rendered_original = validate_lyric_format(new_text, original_text) | |
| # 更新错误提示 | |
| if not is_valid: | |
| error_msg = f"字数与原始文本不符:{rendered_original}" | |
| error_update = gr.update(value=error_msg, visible=True) | |
| else: | |
| error_update = gr.update(value="", visible=False) | |
| # 渲染音频 | |
| audio_path, err = render_single_line(model_sel_v, template_sel_v, i, new_text, speaker_v, key_shift_v, steps_v, bgm_volume_v) | |
| if i < len(lines_list): | |
| lines_list[i] = new_text | |
| if err: | |
| return gr.update(value=None), gr.update(value=f"❌ {err}", visible=True), error_update, lines_list | |
| return gr.update(value=audio_path), gr.update(value="", visible=False), error_update, lines_list | |
| return _submit | |
| tb.submit( | |
| fn=make_submit(idx), | |
| inputs=[tb, lines_state, original_lines_state, model_sel, template_sel, speaker, key_shift, steps, bgm_volume], | |
| outputs=[per_line_audio, per_line_error, error_markdowns[idx], lines_state], | |
| ) | |
| # 生成整首(支持进度与中断) | |
| def on_gen_or_stop(model_sel_v, template_sel_v, speaker_v, key_shift_v, steps_v, bgm_volume_v, lines, stop, generating, progress=gr.Progress(track_tqdm=True)): | |
| # 若正在生成,本次点击作为“停止”信号,仅更新按钮与提示 | |
| if generating: | |
| stop = True | |
| return gr.update(), gr.update(), gr.update(value="生成整首"), gr.update(value="已请求停止,稍候..."), stop, generating | |
| # 启动生成:切换按钮,清空输出,重置停止标志 | |
| stop = False | |
| generating = True | |
| yield gr.update(value=None), gr.update(value=None), gr.update(value="停止生成整首"), gr.update(value="开始生成..."), stop, generating | |
| try: | |
| mapping = find_templates() | |
| if not model_sel_v: | |
| raise gr.Error("请先选择模型") | |
| if template_sel_v not in mapping: | |
| raise gr.Error("未找到模板") | |
| template_path = mapping[template_sel_v] | |
| ds = load_ds(template_path) | |
| if len(ds) != len(lines or []): | |
| raise gr.Error("模板行数与编辑行数不一致") | |
| model_path = ROOT / model_sel_v | |
| segs_with_offsets = [] | |
| total = len(ds) | |
| for idx, item in enumerate(ds): | |
| if stop: | |
| yield gr.update(), gr.update(), gr.update(value="生成整首"), gr.update(value=f"已中断,完成 {idx}/{total} 行。"), stop, False | |
| return | |
| text = (lines[idx] or "").strip() | |
| if not text: | |
| # 更新进度显示但不合成 | |
| progress((idx + 1) / total, desc=f"跳过空白句 {idx+1}/{total}") | |
| yield gr.update(), gr.update(), gr.update(value="停止生成整首"), gr.update(value=f"跳过空白句 {idx+1}/{total}"), stop, True | |
| continue | |
| progress((idx + 1) / total, desc=f"渲染第 {idx+1}/{total} 句") | |
| yield gr.update(), gr.update(), gr.update(value="停止生成整首"), gr.update(value=f"渲染第 {idx+1}/{total} 句..."), stop, True | |
| h = param_hash(model_sel_v, speaker_v, key_shift_v, steps_v, text) | |
| cache_dir = OUTPUT_DIR / template_sel_v / h | |
| cache_dir.mkdir(parents=True, exist_ok=True) | |
| wav_out = cache_dir / f"line_{idx+1}.wav" | |
| if not wav_out.exists(): | |
| engine.synth_line( | |
| model_path=model_path, | |
| template_path=template_path, | |
| line_index=idx, | |
| text=text, | |
| speaker=speaker_v or None, | |
| key_shift=int(key_shift_v), | |
| steps=int(steps_v), | |
| out_wav=wav_out, | |
| ) | |
| segs_with_offsets.append((audiosegment_from_file(wav_out), float(item.get("offset", 0.0)))) | |
| # 拼接输出 | |
| vocal_full = concat_with_offsets(segs_with_offsets) | |
| final_dir = OUTPUT_DIR / template_sel_v / "final" | |
| final_dir.mkdir(parents=True, exist_ok=True) | |
| ts_tag = time.strftime("%Y%m%d_%H%M%S") | |
| vocal_path = final_dir / f"{template_sel_v}_vocal_{ts_tag}.wav" | |
| export_wav(vocal_full, vocal_path) | |
| mixed_path = None | |
| bgm_p = bgm_path_for(template_path) | |
| if (bgm_volume_v and bgm_volume_v > 0.0) and bgm_p and bgm_p.exists(): | |
| mixed = mix_full_song(vocal_full, audiosegment_from_file(bgm_p), bgm_volume_v) | |
| mixed_path = final_dir / f"{template_sel_v}_mixed_{ts_tag}.wav" | |
| export_wav(mixed, mixed_path) | |
| # 完成 | |
| yield gr.update(value=str(vocal_path)), gr.update(value=(str(mixed_path) if mixed_path else None)), gr.update(value="生成整首"), gr.update(value="已完成"), False, False | |
| except Exception as e: | |
| yield gr.update(value=None), gr.update(value=None), gr.update(value="生成整首"), gr.update(value=f"❌ 失败:{type(e).__name__}: {e}"), False, False | |
| gen_btn.click( | |
| fn=on_gen_or_stop, | |
| inputs=[model_sel, template_sel, speaker, key_shift, steps, bgm_volume, lines_state, stop_flag, generating_flag], | |
| outputs=[full_vocal, full_mixed, gen_btn, progress_md, stop_flag, generating_flag], | |
| ) | |
| # 下载当前编辑后的 ds 和 lrc | |
| def build_current_ds(template_sel_v, lines, offsets): | |
| mapping = find_templates() | |
| if not template_sel_v or template_sel_v not in mapping: | |
| raise gr.Error("未选择有效模板") | |
| tpl = mapping[template_sel_v] | |
| ds = load_ds(tpl) | |
| # 覆盖 text,并基于最新文本重算 ph_seq / ph_num | |
| for i in range(min(len(ds), len(lines or []))): | |
| new_text = lines[i] if lines and i < len(lines) else ds[i].get("text", "") | |
| ds[i].replace(new_text) | |
| # 输出到 output/pred_all/<template>/edits | |
| ts_tag = time.strftime("%Y%m%d_%H%M%S") | |
| out_dir = OUTPUT_DIR / template_sel_v / "edits" | |
| out_dir.mkdir(parents=True, exist_ok=True) | |
| # 生成DS文件 | |
| ds_path = out_dir / f"{template_sel_v}_edited_{ts_tag}.ds" | |
| ds_path.write_text(json.dumps(ds, ensure_ascii=False, indent=2), encoding="utf-8") | |
| # 生成LRC文件 | |
| lrc_path = out_dir / f"{template_sel_v}_edited_{ts_tag}.lrc" | |
| lrc_content = generate_lrc_content(ds, lines, offsets) | |
| lrc_path.write_text(lrc_content, encoding="utf-8") | |
| # 生成纯字幕TXT文件 | |
| txt_path = out_dir / f"{template_sel_v}_edited_{ts_tag}.txt" | |
| txt_content = generate_txt_content(ds, lines) | |
| txt_path.write_text(txt_content, encoding="utf-8") | |
| # 创建包含DS、LRC和TXT的压缩包 | |
| zip_path = out_dir / f"{template_sel_v}_edited_{ts_tag}.zip" | |
| with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: | |
| zipf.write(ds_path, ds_path.name) | |
| zipf.write(lrc_path, lrc_path.name) | |
| zipf.write(txt_path, txt_path.name) | |
| return str(zip_path) | |
| def generate_lrc_content(ds_data, lines, offsets): | |
| """生成LRC歌词文件内容""" | |
| lrc_lines = [] | |
| # 添加LRC文件头信息 | |
| lrc_lines.append("[ar:DiffSinger]") | |
| lrc_lines.append("[ti:Generated Song]") | |
| lrc_lines.append("[al:DiffSinger WebUI]") | |
| lrc_lines.append("[by:DiffSinger WebUI]") | |
| lrc_lines.append("") | |
| for i, sentence_data in enumerate(ds_data): | |
| # 获取当前句子的歌词文本 | |
| if i < len(lines or []) and lines[i]: | |
| lyric_text = lines[i] | |
| else: | |
| lyric_text = sentence_data.get("text", "") | |
| # 清理歌词文本,移除AP、SP等标记并去掉所有空格 | |
| display_text = clean_lyric_for_display(lyric_text) | |
| # 如果有实际歌词内容才添加到LRC | |
| if display_text.strip(): | |
| # 使用offset作为开始时间 | |
| start_time = 0.0 | |
| if i < len(offsets or []) and offsets[i] is not None: | |
| start_time = float(offsets[i]) | |
| # 格式化时间戳 [mm:ss.xx] | |
| minutes = int(start_time // 60) | |
| seconds = start_time % 60 | |
| time_stamp = f"[{minutes:02d}:{seconds:05.2f}]" | |
| lrc_lines.append(f"{time_stamp}{display_text}") | |
| return "\n".join(lrc_lines) | |
| def generate_txt_content(ds_data, lines): | |
| """生成纯字幕TXT文件内容""" | |
| txt_lines = [] | |
| for i, sentence_data in enumerate(ds_data): | |
| # 获取当前句子的歌词文本 | |
| if i < len(lines or []) and lines[i]: | |
| lyric_text = lines[i] | |
| else: | |
| lyric_text = sentence_data.get("text", "") | |
| # 清理歌词文本,移除AP、SP等标记并去掉所有空格 | |
| display_text = clean_lyric_for_display(lyric_text) | |
| # 如果有实际歌词内容才添加到TXT | |
| if display_text.strip(): | |
| txt_lines.append(display_text) | |
| return "\n".join(txt_lines) | |
| def clean_lyric_for_display(lyric_text): | |
| """清理歌词文本,移除AP、SP等标记并去掉所有空格,用于LRC显示""" | |
| if not lyric_text: | |
| return "" | |
| # 移除AP、SP标记 | |
| cleaned = re.sub(r'\b(AP|SP)\b', '', lyric_text) | |
| # 去掉所有空格 | |
| cleaned = re.sub(r'\s+', '', cleaned) | |
| return cleaned | |
| download_btn.click( | |
| fn=build_current_ds, | |
| inputs=[template_sel, lines_state, offsets_state], | |
| outputs=[download_btn], | |
| ) | |
| # 上传模板:保存后通过更新模板下拉触发重建(复用模板切换事件) | |
| def on_upload(file_obj, current_template): | |
| name = save_uploaded_template(file_obj) | |
| if name.startswith("ERROR::"): | |
| return gr.update(choices=get_template_choices_and_bgm_visible(), value=current_template), gr.update(value=f"❌ {name[7:]}", visible=True) | |
| # 成功:将模板选择切换为新模板,触发 on_template_change 自动重建文本框 | |
| return gr.update(choices=get_template_choices_and_bgm_visible(), value=name), gr.update(value="", visible=False) | |
| upload.upload( | |
| fn=on_upload, | |
| inputs=[upload, template_sel], | |
| outputs=[template_sel, per_line_error], | |
| ) | |
| # 进入页面时,自动刷新 speaker 与模板/BGM/歌词区 | |
| def on_app_load(model_path_rel, template_name): | |
| spk_upd = on_model_change(model_path_rel) | |
| tpl_upds = on_template_change(template_name) | |
| return (spk_upd, *tpl_upds) | |
| # AI歌词功能事件绑定 | |
| def handle_generate_prompt(lines_list): | |
| """处理生成prompt""" | |
| # 将当前歌词列表合并为文本 | |
| lyrics_text = '\n'.join(lines_list) if lines_list else "" | |
| prompt = generate_ai_prompt(lyrics_text) | |
| return prompt | |
| def handle_apply_lyrics(ai_lyrics, lines_list, original_lines_list): | |
| """处理应用歌词到文本框""" | |
| if not ai_lyrics or not ai_lyrics.strip(): | |
| gr.Warning("请先输入回填歌词") | |
| return [gr.update() for _ in textboxes] + [gr.update() for _ in error_markdowns] + [lines_list] | |
| # 分割歌词为行,保留空行以保持行数一致 | |
| ai_lines = [line.strip() for line in ai_lyrics.split('\n')] | |
| # 检查行数是否一致 | |
| if len(ai_lines) != len(lines_list): | |
| gr.Warning(f"行数不匹配:AI歌词有{len(ai_lines)}行,原始歌词有{len(lines_list)}行") | |
| return [gr.update() for _ in textboxes] + [gr.update() for _ in error_markdowns] + [lines_list] | |
| # 更新文本框和状态,同时校验每句格式 | |
| textbox_updates = [] | |
| error_updates = [] | |
| new_lines = ai_lines[:len(lines_list)] # 直接使用AI歌词,截断到原始长度 | |
| has_errors = False | |
| # 更新所有文本框并校验格式 | |
| for i in range(len(textboxes)): | |
| if i < len(new_lines): | |
| textbox_updates.append(gr.update(value=new_lines[i])) | |
| # 校验当前句子格式 | |
| if i < len(original_lines_list): | |
| original_text = original_lines_list[i] | |
| new_text = new_lines[i] | |
| is_valid, rendered_original = validate_lyric_format(new_text, original_text) | |
| if not is_valid: | |
| error_msg = f"字数与原始文本不符:{rendered_original}" | |
| error_updates.append(gr.update(value=error_msg, visible=True)) | |
| has_errors = True | |
| else: | |
| error_updates.append(gr.update(value="", visible=False)) | |
| else: | |
| error_updates.append(gr.update(value="", visible=False)) | |
| else: | |
| textbox_updates.append(gr.update()) | |
| error_updates.append(gr.update(value="", visible=False)) | |
| # 补齐剩余的错误提示更新 | |
| while len(error_updates) < len(error_markdowns): | |
| error_updates.append(gr.update(value="", visible=False)) | |
| if has_errors: | |
| gr.Warning("歌词应用成功,但部分句子格式有误,请检查红色提示") | |
| else: | |
| gr.Info("歌词应用成功!所有句子格式正确") | |
| return textbox_updates + error_updates + [new_lines] | |
| # 生成prompt按钮 | |
| generate_prompt_btn.click( | |
| fn=handle_generate_prompt, | |
| inputs=[lines_state], | |
| outputs=[ai_prompt_display] | |
| ) | |
| # 使用Gradio内置的复制功能 | |
| copy_prompt_btn.click( | |
| fn=lambda x: x, # 直接返回文本内容 | |
| inputs=[ai_prompt_display], | |
| outputs=[], | |
| js="(text) => { navigator.clipboard.writeText(text); return text; }" | |
| ) | |
| apply_lyrics_btn.click( | |
| fn=handle_apply_lyrics, | |
| inputs=[ai_lyrics_input, lines_state, original_lines_state], | |
| outputs=[*textboxes, *error_markdowns, lines_state] | |
| ) | |
| demo.load( | |
| fn=on_app_load, | |
| inputs=[model_sel, template_sel], | |
| outputs=[speaker, bgm_volume, template_sel, lines_state, offsets_state, per_line_error, full_mixed, original_lines_state, *textboxes, *error_markdowns], | |
| ) | |
| return demo | |
| def main(): | |
| demo = build_ui() | |
| demo.launch() | |
| if __name__ == "__main__": | |
| main() |