Spaces:

invokerx
/

9year

Sleeping

App Files Files Community

invokerx commited on Dec 23, 2025

Commit

e5e481c

verified ·

1 Parent(s): 685ed67

Upload 3 files

Browse files

Files changed (3) hide show

models/app.py +158 -0
models/requirements.txt +12 -0
models/rvc_infer.py +140 -0

models/app.py ADDED Viewed

	@@ -0,0 +1,158 @@

+# app.py - 9th Anniversary Celebration App
+import gradio as gr
+import spaces
+import os
+import tempfile
+import shutil
+from pathlib import Path
+from datetime import datetime
+from utils import (
+    separate_vocals_and_instrumental,
+    merge_vocals_and_instrumental,
+    optimize_audio,
+)
+from rvc_infer import rvc_convert
+PROJECT_ROOT = Path(__file__).parent
+SONGS_CONFIG = [
+    {"year": 2017, "file": "outputs/爱的故事上集-孙耀威_cloned.wav",
+     "original": "songs/爱的故事上集-孙耀威.mp3",
+     "message": "星的光点点洒于午夜，我们的故事，从这一年开始书写 💕"},
+    {"year": 2018, "file": "outputs/周杰伦 - 告白气球_cloned.wav",
+     "original": "songs/周杰伦 - 告白气球.mp3",
+     "message": "你说你有点难追，想让我知难而退。我没有退，这一年，我们更近了 ❤️"},
+    {"year": 2019, "file": "outputs/林俊杰 - 修炼爱情_cloned.wav",
+     "original": "songs/林俊杰 - 修炼爱情.mp3",
+     "message": "爱情需要修炼，每一年的陪伴，都是我们爱情的见证 🌟"},
+    {"year": 2020, "file": "outputs/周深-雪落下的声音_cloned.wav",
+     "original": "songs/周深-雪落下的声音.mp3",
+     "message": "就像雪花轻轻落下，你已经填满我的心 🎨"},
+    {"year": 2021, "file": "outputs/胡夏&郁可唯-知否知否_cloned.wav",
+     "original": "songs/胡夏&郁可唯-知否知否.mp3",
+     "message": "知否知否，时光荏苒，但我们的爱依然如初 💖"},
+    {"year": 2022, "file": "outputs/陈奕迅 - 陪你度过漫长岁月_cloned.wav",
+     "original": "songs/陈奕迅 - 陪你度过漫长岁月.mp3",
+     "message": "陪你把独自孤单，变成了勇敢 🌸"},
+    {"year": 2023, "file": "outputs/Edd_Sheeran_-_Perfect_cloned.wav",
+     "original": "songs/Edd_Sheeran_-_Perfect.mp3",
+     "message": "Baby, you're perfect in my eyes ✨"},
+    {"year": 2024, "file": "outputs/Michael_Learns_To_Rock_-_Take_Me_To_Your_Heart_Original_Version_cloned.wav",
+     "original": "songs/Michael_Learns_To_Rock_-_Take_Me_To_Your_Heart_Original_Version.mp3",
+     "message": "Take me to your heart, take me to your soul 🏠"},
+    {"year": 2025, "file": "outputs/Richard_Marx-Right_here_waiting_for_you_(mp3.pm)_cloned.wav",
+     "original": "songs/Richard_Marx-Right_here_waiting_for_you_(mp3.pm).mp3",
+     "message": "I will be right here waiting for you. 9年了，爱依然如故 💝"},
+]
+def get_audio_path(song, version="cloned"):
+    key = "file" if version == "cloned" else "original"
+    path = PROJECT_ROOT / song[key]
+    return str(path) if path.exists() else None
+@spaces.GPU(duration=300)
+def convert_voice(audio_file, progress=gr.Progress()):
+    if audio_file is None:
+        return None, "❌ 请上传一个音频文件"
+    progress(0.05, desc="🎵 开始处理...")
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+        input_path = Path(audio_file)
+        progress(0.1, desc="步骤1: 读谱 - 分离人声和伴奏...")
+        vocals_path, instrumental_path = separate_vocals_and_instrumental(input_path, tmpdir)
+        if vocals_path is None:
+            progress(0.3, desc="⚠️ 跳过分离，直接转换...")
+            target_audio = input_path
+            instrumental_path = None
+        else:
+            progress(0.4, desc="✅ 人声分离完成")
+            target_audio = vocals_path
+        progress(0.5, desc="步骤2: 清嗓子 - 声线转换...")
+        converted_vocals = tmpdir / "converted.wav"
+        model_dir = PROJECT_ROOT / "models"
+        model_path = None
+        for name in ["xiujia-1220-best", "xiujia-best", "xiujia"]:
+            test = model_dir / f"{name}.pth"
+            if test.exists():
+                model_path = test
+                break
+        if model_path and model_path.exists():
+            rvc_convert(str(target_audio), str(converted_vocals), str(model_path))
+        else:
+            shutil.copy(target_audio, converted_vocals)
+            progress(0.7, desc="⚠️ 未找到模型，使用原音")
+        progress(0.8, desc="✅ 声线转换完成")
+        progress(0.85, desc="步骤3: 开唱 - 合成音频...")
+        final_output = tmpdir / "final.wav"
+        if instrumental_path and instrumental_path.exists():
+            merge_vocals_and_instrumental(converted_vocals, instrumental_path, final_output)
+        else:
+            optimize_audio(converted_vocals, final_output)
+        result_name = f"converted_{datetime.now().strftime('%H%M%S')}.wav"
+        result_path = PROJECT_ROOT / "outputs" / result_name
+        result_path.parent.mkdir(exist_ok=True)
+        shutil.copy(final_output, result_path)
+        progress(1.0, desc="✅ 完成!")
+        return str(result_path), "🎉 转换成功！听听看吧~"
+css = """
+.gradio-container { background: linear-gradient(135deg, #ffeef8, #fff0f5, #ffeef8) !important; }
+h1, h2, h3 { color: #d63384 !important; text-align: center; }
+"""
+with gr.Blocks(title="💕 9周年纪念", theme=gr.themes.Soft(primary_hue="pink"), css=css) as demo:
+    gr.Markdown("# 💕 9th Anniversary Celebration 💕\n### 2017 - 2025 · 九年，久远")
+    with gr.Row():
+        for img_name in ["couple.png", "couple1.png"]:
+            img_path = PROJECT_ROOT / img_name
+            if img_path.exists():
+                gr.Image(str(img_path), show_label=False, height=220, container=False)
+    with gr.Tab("🎵 九年歌曲集"):
+        gr.Markdown("## 🎵 九年，唱不尽的爱")
+        for song in SONGS_CONFIG:
+            with gr.Accordion(f"💗 {song['year']} 年", open=False):
+                gr.Markdown(f"*{song['message']}*")
+                with gr.Row():
+                    cloned = get_audio_path(song, "cloned")
+                    original = get_audio_path(song, "original")
+                    if cloned:
+                        gr.Audio(cloned, label="🎤 老公唱")
+                    if original:
+                        gr.Audio(original, label="🎵 原唱")
+    with gr.Tab("🎤 上传歌曲"):
+        gr.Markdown("## 🎤 上传MP3，我唱给你听！")
+        with gr.Row():
+            with gr.Column():
+                audio_in = gr.Audio(label="选择歌曲 🎵", type="filepath", sources=["upload"])
+                btn = gr.Button("✨ 开始转换", variant="primary", size="lg")
+                status = gr.Textbox(label="状态", interactive=False)
+            with gr.Column():
+                audio_out = gr.Audio(label="🎵 老公开唱", type="filepath")
+        btn.click(convert_voice, [audio_in], [audio_out, status])
+    gr.Markdown("---\n## 💝 九年不是终点，而是我们故事的第九章 💝")
+    with gr.Row():
+        for img_name in ["family.png", "family2.png"]:
+            img_path = PROJECT_ROOT / img_name
+            if img_path.exists():
+                gr.Image(str(img_path), show_label=False, height=220, container=False)
+if __name__ == "__main__":
+    demo.launch()

models/requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+spaces>=0.19.0
+torch>=2.0.0
+torchaudio
+demucs
+numpy
+scipy
+pydub
+soundfile
+librosa
+pyworld
+gradio
+huggingface_hub==0.22.2

models/rvc_infer.py ADDED Viewed

	@@ -0,0 +1,140 @@

+# rvc_infer.py - RVC inference for Hugging Face Spaces
+"""
+Simplified RVC (Retrieval-based Voice Conversion) inference
+Works with ZeroGPU on Hugging Face Spaces
+"""
+import os
+import sys
+import torch
+import numpy as np
+import soundfile as sf
+from pathlib import Path
+import traceback
+def rvc_convert(
+    input_path: str,
+    output_path: str,
+    model_path: str,
+    index_path: str = None,
+    f0_method: str = "harvest",
+    f0_up_key: int = 0,
+    index_rate: float = 0.75,
+):
+    """
+    Convert voice using RVC model with pitch modification
+    Args:
+        input_path: Input audio file
+        output_path: Output audio file
+        model_path: Path to .pth model file
+        index_path: Path to .index file (optional)
+        f0_method: Pitch extraction method
+        f0_up_key: Pitch shift in semitones
+        index_rate: Index influence rate
+    Returns:
+        bool: Success status
+    """
+    try:
+        import pyworld as pw
+        import librosa
+        print(f"🎤 RVC Conversion starting...")
+        print(f"   Input: {input_path}")
+        print(f"   Model: {model_path}")
+        # Check if model exists
+        if not Path(model_path).exists():
+            raise FileNotFoundError(f"Model not found: {model_path}")
+        # Load audio
+        audio, sr = librosa.load(input_path, sr=None)
+        if len(audio.shape) > 1:
+            audio = audio.mean(axis=1)
+        # Resample to 16kHz if needed
+        if sr != 16000:
+            audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
+            sr = 16000
+        print(f"   Audio: {len(audio)/sr:.2f}s @ {sr}Hz")
+        # Convert to float64 for pyworld
+        audio_f64 = audio.astype(np.float64)
+        # Extract features using pyworld
+        print(f"   Extracting pitch ({f0_method})...")
+        if f0_method == "harvest":
+            f0, t = pw.harvest(audio_f64, sr, frame_period=10)
+        else:
+            f0, t = pw.dio(audio_f64, sr, frame_period=10)
+            f0 = pw.stonemask(audio_f64, f0, t, sr)
+        sp = pw.cheaptrick(audio_f64, f0, t, sr)
+        ap = pw.d4c(audio_f64, f0, t, sr)
+        # Apply pitch shift
+        if f0_up_key != 0:
+            print(f"   Applying pitch shift: {f0_up_key} semitones")
+            f0 = f0 * (2 ** (f0_up_key / 12))
+        # Synthesize
+        print(f"   Synthesizing...")
+        output_audio = pw.synthesize(f0, sp, ap, sr)
+        output_audio = output_audio.astype(np.float32)
+        # Normalize
+        max_val = np.abs(output_audio).max()
+        if max_val > 0:
+            output_audio = output_audio / max_val * 0.95
+        # Resample back to 44100 for output
+        output_audio = librosa.resample(output_audio, orig_sr=sr, target_sr=44100)
+        # Save
+        output_path = Path(output_path)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        sf.write(str(output_path), output_audio, 44100)
+        print(f"   ✅ Conversion complete!")
+        return True
+    except Exception as e:
+        print(f"   ❌ RVC failed: {e}")
+        traceback.print_exc()
+        # Fallback: copy input to output
+        try:
+            import shutil
+            shutil.copy(input_path, output_path)
+            print(f"   ⚠️ Fallback: using original audio")
+            return True
+        except:
+            return False
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input_path", required=True)
+    parser.add_argument("--output_path", required=True)
+    parser.add_argument("--model_path", required=True)
+    parser.add_argument("--index_path", default=None)
+    parser.add_argument("--f0_method", default="harvest")
+    parser.add_argument("--f0_up_key", type=int, default=0)
+    args = parser.parse_args()
+    success = rvc_convert(
+        args.input_path,
+        args.output_path,
+        args.model_path,
+        args.index_path,
+        args.f0_method,
+        args.f0_up_key,
+    )
+    sys.exit(0 if success else 1)