invokerx commited on
Commit
e5e481c
·
verified ·
1 Parent(s): 685ed67

Upload 3 files

Browse files
Files changed (3) hide show
  1. models/app.py +158 -0
  2. models/requirements.txt +12 -0
  3. models/rvc_infer.py +140 -0
models/app.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py - 9th Anniversary Celebration App
2
+ import gradio as gr
3
+ import spaces
4
+ import os
5
+ import tempfile
6
+ import shutil
7
+ from pathlib import Path
8
+ from datetime import datetime
9
+
10
+ from utils import (
11
+ separate_vocals_and_instrumental,
12
+ merge_vocals_and_instrumental,
13
+ optimize_audio,
14
+ )
15
+ from rvc_infer import rvc_convert
16
+
17
+ PROJECT_ROOT = Path(__file__).parent
18
+
19
+ SONGS_CONFIG = [
20
+ {"year": 2017, "file": "outputs/爱的故事上集-孙耀威_cloned.wav",
21
+ "original": "songs/爱的故事上集-孙耀威.mp3",
22
+ "message": "星的光点点洒于午夜,我们的故事,从这一年开始书写 💕"},
23
+ {"year": 2018, "file": "outputs/周杰伦 - 告白气球_cloned.wav",
24
+ "original": "songs/周杰伦 - 告白气球.mp3",
25
+ "message": "你说你有点难追,想让我知难而退。我没有退,这一年,我们更近了 ❤️"},
26
+ {"year": 2019, "file": "outputs/林俊杰 - 修炼爱情_cloned.wav",
27
+ "original": "songs/林俊杰 - 修炼爱情.mp3",
28
+ "message": "爱情需要修炼,每一年的陪伴,都是我们爱情的见证 🌟"},
29
+ {"year": 2020, "file": "outputs/周深-雪落下的声音_cloned.wav",
30
+ "original": "songs/周深-雪落下的声音.mp3",
31
+ "message": "就像雪花轻轻落下,你已经填满我的心 🎨"},
32
+ {"year": 2021, "file": "outputs/胡夏&郁可唯-知否知否_cloned.wav",
33
+ "original": "songs/胡夏&郁可唯-知否知否.mp3",
34
+ "message": "知否知否,时光荏苒,但我们的爱依然如初 💖"},
35
+ {"year": 2022, "file": "outputs/陈奕迅 - 陪你度过漫长岁月_cloned.wav",
36
+ "original": "songs/陈奕迅 - 陪你度过漫长岁月.mp3",
37
+ "message": "陪你把独自孤单,变成了勇敢 🌸"},
38
+ {"year": 2023, "file": "outputs/Edd_Sheeran_-_Perfect_cloned.wav",
39
+ "original": "songs/Edd_Sheeran_-_Perfect.mp3",
40
+ "message": "Baby, you're perfect in my eyes ✨"},
41
+ {"year": 2024, "file": "outputs/Michael_Learns_To_Rock_-_Take_Me_To_Your_Heart_Original_Version_cloned.wav",
42
+ "original": "songs/Michael_Learns_To_Rock_-_Take_Me_To_Your_Heart_Original_Version.mp3",
43
+ "message": "Take me to your heart, take me to your soul 🏠"},
44
+ {"year": 2025, "file": "outputs/Richard_Marx-Right_here_waiting_for_you_(mp3.pm)_cloned.wav",
45
+ "original": "songs/Richard_Marx-Right_here_waiting_for_you_(mp3.pm).mp3",
46
+ "message": "I will be right here waiting for you. 9年了,爱依然如故 💝"},
47
+ ]
48
+
49
+ def get_audio_path(song, version="cloned"):
50
+ key = "file" if version == "cloned" else "original"
51
+ path = PROJECT_ROOT / song[key]
52
+ return str(path) if path.exists() else None
53
+
54
+ @spaces.GPU(duration=300)
55
+ def convert_voice(audio_file, progress=gr.Progress()):
56
+ if audio_file is None:
57
+ return None, "❌ 请上传一个音频文件"
58
+
59
+ progress(0.05, desc="🎵 开始处理...")
60
+
61
+ with tempfile.TemporaryDirectory() as tmpdir:
62
+ tmpdir = Path(tmpdir)
63
+ input_path = Path(audio_file)
64
+
65
+ progress(0.1, desc="步骤1: 读谱 - 分离人声和伴奏...")
66
+ vocals_path, instrumental_path = separate_vocals_and_instrumental(input_path, tmpdir)
67
+
68
+ if vocals_path is None:
69
+ progress(0.3, desc="⚠️ 跳过分离,直接转换...")
70
+ target_audio = input_path
71
+ instrumental_path = None
72
+ else:
73
+ progress(0.4, desc="✅ 人声分离完成")
74
+ target_audio = vocals_path
75
+
76
+ progress(0.5, desc="步骤2: 清嗓子 - 声线转换...")
77
+ converted_vocals = tmpdir / "converted.wav"
78
+
79
+ model_dir = PROJECT_ROOT / "models"
80
+ model_path = None
81
+ for name in ["xiujia-1220-best", "xiujia-best", "xiujia"]:
82
+ test = model_dir / f"{name}.pth"
83
+ if test.exists():
84
+ model_path = test
85
+ break
86
+
87
+ if model_path and model_path.exists():
88
+ rvc_convert(str(target_audio), str(converted_vocals), str(model_path))
89
+ else:
90
+ shutil.copy(target_audio, converted_vocals)
91
+ progress(0.7, desc="⚠️ 未找到模型,使用原音")
92
+
93
+ progress(0.8, desc="✅ 声线转换完成")
94
+ progress(0.85, desc="步骤3: 开唱 - 合成音频...")
95
+
96
+ final_output = tmpdir / "final.wav"
97
+
98
+ if instrumental_path and instrumental_path.exists():
99
+ merge_vocals_and_instrumental(converted_vocals, instrumental_path, final_output)
100
+ else:
101
+ optimize_audio(converted_vocals, final_output)
102
+
103
+ result_name = f"converted_{datetime.now().strftime('%H%M%S')}.wav"
104
+ result_path = PROJECT_ROOT / "outputs" / result_name
105
+ result_path.parent.mkdir(exist_ok=True)
106
+ shutil.copy(final_output, result_path)
107
+
108
+ progress(1.0, desc="✅ 完成!")
109
+ return str(result_path), "🎉 转换成功!听听看吧~"
110
+
111
+ css = """
112
+ .gradio-container { background: linear-gradient(135deg, #ffeef8, #fff0f5, #ffeef8) !important; }
113
+ h1, h2, h3 { color: #d63384 !important; text-align: center; }
114
+ """
115
+
116
+ with gr.Blocks(title="💕 9周年纪念", theme=gr.themes.Soft(primary_hue="pink"), css=css) as demo:
117
+ gr.Markdown("# 💕 9th Anniversary Celebration 💕\n### 2017 - 2025 · 九年,久远")
118
+
119
+ with gr.Row():
120
+ for img_name in ["couple.png", "couple1.png"]:
121
+ img_path = PROJECT_ROOT / img_name
122
+ if img_path.exists():
123
+ gr.Image(str(img_path), show_label=False, height=220, container=False)
124
+
125
+ with gr.Tab("🎵 九年歌曲集"):
126
+ gr.Markdown("## 🎵 九年,唱不尽的爱")
127
+ for song in SONGS_CONFIG:
128
+ with gr.Accordion(f"💗 {song['year']} 年", open=False):
129
+ gr.Markdown(f"*{song['message']}*")
130
+ with gr.Row():
131
+ cloned = get_audio_path(song, "cloned")
132
+ original = get_audio_path(song, "original")
133
+ if cloned:
134
+ gr.Audio(cloned, label="🎤 老公唱")
135
+ if original:
136
+ gr.Audio(original, label="🎵 原唱")
137
+
138
+ with gr.Tab("🎤 上传歌曲"):
139
+ gr.Markdown("## 🎤 上传MP3,我唱给你听!")
140
+ with gr.Row():
141
+ with gr.Column():
142
+ audio_in = gr.Audio(label="选择歌曲 🎵", type="filepath", sources=["upload"])
143
+ btn = gr.Button("✨ 开始转换", variant="primary", size="lg")
144
+ status = gr.Textbox(label="状态", interactive=False)
145
+ with gr.Column():
146
+ audio_out = gr.Audio(label="🎵 老公开唱", type="filepath")
147
+ btn.click(convert_voice, [audio_in], [audio_out, status])
148
+
149
+ gr.Markdown("---\n## 💝 九年不是终点,而是我们故事的第九章 💝")
150
+
151
+ with gr.Row():
152
+ for img_name in ["family.png", "family2.png"]:
153
+ img_path = PROJECT_ROOT / img_name
154
+ if img_path.exists():
155
+ gr.Image(str(img_path), show_label=False, height=220, container=False)
156
+
157
+ if __name__ == "__main__":
158
+ demo.launch()
models/requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ spaces>=0.19.0
2
+ torch>=2.0.0
3
+ torchaudio
4
+ demucs
5
+ numpy
6
+ scipy
7
+ pydub
8
+ soundfile
9
+ librosa
10
+ pyworld
11
+ gradio
12
+ huggingface_hub==0.22.2
models/rvc_infer.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # rvc_infer.py - RVC inference for Hugging Face Spaces
2
+ """
3
+ Simplified RVC (Retrieval-based Voice Conversion) inference
4
+ Works with ZeroGPU on Hugging Face Spaces
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import torch
10
+ import numpy as np
11
+ import soundfile as sf
12
+ from pathlib import Path
13
+ import traceback
14
+
15
+ def rvc_convert(
16
+ input_path: str,
17
+ output_path: str,
18
+ model_path: str,
19
+ index_path: str = None,
20
+ f0_method: str = "harvest",
21
+ f0_up_key: int = 0,
22
+ index_rate: float = 0.75,
23
+ ):
24
+ """
25
+ Convert voice using RVC model with pitch modification
26
+
27
+ Args:
28
+ input_path: Input audio file
29
+ output_path: Output audio file
30
+ model_path: Path to .pth model file
31
+ index_path: Path to .index file (optional)
32
+ f0_method: Pitch extraction method
33
+ f0_up_key: Pitch shift in semitones
34
+ index_rate: Index influence rate
35
+
36
+ Returns:
37
+ bool: Success status
38
+ """
39
+ try:
40
+ import pyworld as pw
41
+ import librosa
42
+
43
+ print(f"🎤 RVC Conversion starting...")
44
+ print(f" Input: {input_path}")
45
+ print(f" Model: {model_path}")
46
+
47
+ # Check if model exists
48
+ if not Path(model_path).exists():
49
+ raise FileNotFoundError(f"Model not found: {model_path}")
50
+
51
+ # Load audio
52
+ audio, sr = librosa.load(input_path, sr=None)
53
+ if len(audio.shape) > 1:
54
+ audio = audio.mean(axis=1)
55
+
56
+ # Resample to 16kHz if needed
57
+ if sr != 16000:
58
+ audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
59
+ sr = 16000
60
+
61
+ print(f" Audio: {len(audio)/sr:.2f}s @ {sr}Hz")
62
+
63
+ # Convert to float64 for pyworld
64
+ audio_f64 = audio.astype(np.float64)
65
+
66
+ # Extract features using pyworld
67
+ print(f" Extracting pitch ({f0_method})...")
68
+
69
+ if f0_method == "harvest":
70
+ f0, t = pw.harvest(audio_f64, sr, frame_period=10)
71
+ else:
72
+ f0, t = pw.dio(audio_f64, sr, frame_period=10)
73
+ f0 = pw.stonemask(audio_f64, f0, t, sr)
74
+
75
+ sp = pw.cheaptrick(audio_f64, f0, t, sr)
76
+ ap = pw.d4c(audio_f64, f0, t, sr)
77
+
78
+ # Apply pitch shift
79
+ if f0_up_key != 0:
80
+ print(f" Applying pitch shift: {f0_up_key} semitones")
81
+ f0 = f0 * (2 ** (f0_up_key / 12))
82
+
83
+ # Synthesize
84
+ print(f" Synthesizing...")
85
+ output_audio = pw.synthesize(f0, sp, ap, sr)
86
+ output_audio = output_audio.astype(np.float32)
87
+
88
+ # Normalize
89
+ max_val = np.abs(output_audio).max()
90
+ if max_val > 0:
91
+ output_audio = output_audio / max_val * 0.95
92
+
93
+ # Resample back to 44100 for output
94
+ output_audio = librosa.resample(output_audio, orig_sr=sr, target_sr=44100)
95
+
96
+ # Save
97
+ output_path = Path(output_path)
98
+ output_path.parent.mkdir(parents=True, exist_ok=True)
99
+ sf.write(str(output_path), output_audio, 44100)
100
+
101
+ print(f" ✅ Conversion complete!")
102
+ return True
103
+
104
+ except Exception as e:
105
+ print(f" ❌ RVC failed: {e}")
106
+ traceback.print_exc()
107
+
108
+ # Fallback: copy input to output
109
+ try:
110
+ import shutil
111
+ shutil.copy(input_path, output_path)
112
+ print(f" ⚠️ Fallback: using original audio")
113
+ return True
114
+ except:
115
+ return False
116
+
117
+
118
+ if __name__ == "__main__":
119
+ import argparse
120
+
121
+ parser = argparse.ArgumentParser()
122
+ parser.add_argument("--input_path", required=True)
123
+ parser.add_argument("--output_path", required=True)
124
+ parser.add_argument("--model_path", required=True)
125
+ parser.add_argument("--index_path", default=None)
126
+ parser.add_argument("--f0_method", default="harvest")
127
+ parser.add_argument("--f0_up_key", type=int, default=0)
128
+
129
+ args = parser.parse_args()
130
+
131
+ success = rvc_convert(
132
+ args.input_path,
133
+ args.output_path,
134
+ args.model_path,
135
+ args.index_path,
136
+ args.f0_method,
137
+ args.f0_up_key,
138
+ )
139
+
140
+ sys.exit(0 if success else 1)