ayf3 commited on
Commit
0624bf6
·
verified ·
1 Parent(s): e3f9e67

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. Dockerfile +42 -0
  2. README.md +6 -6
  3. train.py +410 -0
Dockerfile ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ubuntu:22.04
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+ ENV PYTHONUNBUFFERED=1
5
+ ENV CUDA_VISIBLE_DEVICES=""
6
+
7
+ # System dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ python3 python3-pip python3-dev \
10
+ git ffmpeg libsndfile1 libsox-dev \
11
+ build-essential cmake \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ WORKDIR /app
15
+
16
+ # Python dependencies - CPU only
17
+ RUN pip3 install --no-cache-dir --upgrade pip && \
18
+ pip3 install --no-cache-dir \
19
+ torch torchaudio --index-url https://download.pytorch.org/whl/cpu && \
20
+ pip3 install --no-cache-dir \
21
+ numpy scipy librosa soundfile \
22
+ huggingface_hub \
23
+ fairseq==0.12.2 \
24
+ pyworld==0.3.4 \
25
+ crepe \
26
+ praat-parselmouth \
27
+ pydub \
28
+ ffmpeg-python || \
29
+ pip3 install --no-cache-dir torch torchaudio numpy scipy librosa soundfile huggingface_hub pydub
30
+
31
+ # Create user (HF requires UID 1000)
32
+ RUN useradd -m -u 1000 user && \
33
+ mkdir -p /app/rvc_work && \
34
+ chown -R user:user /app
35
+
36
+ COPY train.py .
37
+
38
+ USER user
39
+
40
+ ENV HOME=/home/user
41
+
42
+ CMD ["python3", "train.py"]
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- title: Rvc Cpu Trainer
3
- emoji: 📚
4
- colorFrom: indigo
5
- colorTo: yellow
6
  sdk: docker
 
 
7
  pinned: false
8
  ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: RVC CPU Training - NumberBlocks One
3
+ emoji: 🎤
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: docker
7
+ app_port: 7860
8
+ hardware: cpu-basic
9
  pinned: false
10
  ---
 
 
train.py ADDED
@@ -0,0 +1,410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ RVC v2 CPU Training Script for NumberBlocks One Voice Cloning
4
+ Runs on HuggingFace Docker Space with CPU (no GPU required).
5
+
6
+ Pipeline:
7
+ 1. Clone RVC-Project for training scripts
8
+ 2. Download top500 + augmented training data (2000 samples)
9
+ 3. Run RVC preprocessing (extract f0, extract feature)
10
+ 4. Train RVC v2 model (CPU mode, ~12-24h)
11
+ 5. Upload model to dataset
12
+ """
13
+
14
+ import os
15
+ import sys
16
+ import json
17
+ import time
18
+ import shutil
19
+ import subprocess
20
+ import glob
21
+ import traceback
22
+ import logging
23
+ import signal
24
+
25
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s', stream=sys.stdout)
26
+ logger = logging.getLogger(__name__)
27
+
28
+ DATASET_ID = "ayf3/numberblocks-one-voice-dataset"
29
+ MODEL_OUTPUT_DIR = "models"
30
+ EXPERIMENT_NAME = "one_voice"
31
+ TARGET_STEPS = 5000
32
+ SAMPLE_RATE = 40000
33
+ VERSION = "v2"
34
+ BATCH_SIZE = 2 # CPU-friendly small batch
35
+
36
+ WORK_DIR = "/app/rvc_work"
37
+ RVC_DIR = "/app/RVC"
38
+ DATASET_DIR = os.path.join(WORK_DIR, "dataset")
39
+
40
+ def run_cmd(cmd, cwd=None, check=True, timeout=3600):
41
+ """Run shell command with real-time output."""
42
+ logger.info(f"CMD: {cmd}")
43
+ try:
44
+ result = subprocess.run(
45
+ cmd, shell=True, cwd=cwd, check=check,
46
+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
47
+ text=True, timeout=timeout
48
+ )
49
+ if result.stdout:
50
+ # Print last 3000 chars to keep logs manageable
51
+ output = result.stdout[-3000:] if len(result.stdout) > 3000 else result.stdout
52
+ print(output)
53
+ return result
54
+ except subprocess.TimeoutExpired:
55
+ logger.warning(f"Command timed out: {cmd[:100]}")
56
+ return None
57
+ except subprocess.CalledProcessError as e:
58
+ logger.error(f"Command failed (exit {e.returncode}): {e.stdout[-1000:] if e.stdout else 'no output'}")
59
+ if check:
60
+ raise
61
+ return None
62
+
63
+ def write_status(status, progress="", message=""):
64
+ """Write status to /tmp for health checks."""
65
+ status_data = {
66
+ "status": status,
67
+ "progress": progress,
68
+ "message": message,
69
+ "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
70
+ }
71
+ with open("/tmp/train_status.json", "w") as f:
72
+ json.dump(status_data, f)
73
+ logger.info(f"Status: {status} | {progress} | {message}")
74
+
75
+ def step1_clone_rvc():
76
+ """Clone the original RVC project."""
77
+ logger.info("=== Step 1: Clone RVC-Project ===")
78
+ if os.path.exists(os.path.join(RVC_DIR, "infer", "train.py")):
79
+ logger.info("RVC already cloned, skipping.")
80
+ return
81
+
82
+ # Remove incomplete clones
83
+ if os.path.exists(RVC_DIR):
84
+ shutil.rmtree(RVC_DIR)
85
+
86
+ run_cmd(f"git clone --depth 1 https://github.com/RVC-Project/Retrieval-based-Voice-Conversion.git {RVC_DIR}", timeout=600)
87
+
88
+ # Install RVC dependencies
89
+ logger.info("Installing RVC dependencies...")
90
+ run_cmd(f"pip3 install --no-cache-dir fairseq==0.12.2", cwd=RVC_DIR, check=False, timeout=600)
91
+ run_cmd(f"pip3 install --no-cache-dir pyworld==0.3.4", check=False, timeout=600)
92
+ run_cmd(f"pip3 install --no-cache-dir crepe", check=False, timeout=300)
93
+ run_cmd(f"pip3 install --no-cache-dir praat-parselmouth", check=False, timeout=300)
94
+ run_cmd(f"pip3 install --no-cache-dir torch torchaudio --index-url https://download.pytorch.org/whl/cpu", timeout=1200)
95
+
96
+ # Install requirements if exists
97
+ req_file = os.path.join(RVC_DIR, "requirements.txt")
98
+ if os.path.exists(req_file):
99
+ run_cmd(f"pip3 install --no-cache-dir -r requirements.txt", cwd=RVC_DIR, check=False, timeout=600)
100
+
101
+ logger.info("RVC project ready.")
102
+
103
+ def step2_download_data():
104
+ """Download top500 + augmented training data."""
105
+ logger.info("=== Step 2: Download Training Data ===")
106
+
107
+ from huggingface_hub import HfApi, hf_hub_download
108
+ api = HfApi(token=os.environ.get("HF_TOKEN"))
109
+
110
+ # Get all segment files from both top500 and augmented
111
+ all_files = api.list_repo_files(repo_id=DATASET_ID, repo_type='dataset')
112
+
113
+ train_files = []
114
+ for f in all_files:
115
+ if f.startswith('data/train_top500/') and f.endswith('.wav'):
116
+ train_files.append(f)
117
+ elif f.startswith('data/train_augmented/') and f.endswith('.wav'):
118
+ train_files.append(f)
119
+
120
+ logger.info(f"Found {len(train_files)} training files")
121
+ write_status("downloading", f"0/{len(train_files)}", "Downloading training data...")
122
+
123
+ os.makedirs(DATASET_DIR, exist_ok=True)
124
+
125
+ downloaded = 0
126
+ skipped = 0
127
+ for i, fpath in enumerate(train_files):
128
+ local_name = fpath.split('/')[-1]
129
+ local_path = os.path.join(DATASET_DIR, local_name)
130
+
131
+ if os.path.exists(local_path):
132
+ downloaded += 1
133
+ skipped += 1
134
+ continue
135
+
136
+ try:
137
+ hf_hub_download(
138
+ repo_id=DATASET_ID,
139
+ filename=fpath,
140
+ repo_type='dataset',
141
+ local_dir=WORK_DIR,
142
+ token=os.environ.get("HF_TOKEN"),
143
+ )
144
+ # Move from data/train_xxx/ to dataset_dir
145
+ src = os.path.join(WORK_DIR, fpath)
146
+ if os.path.exists(src) and src != local_path:
147
+ os.makedirs(os.path.dirname(local_path), exist_ok=True)
148
+ shutil.move(src, local_path)
149
+ downloaded += 1
150
+ except Exception as e:
151
+ logger.warning(f"Failed to download {fpath}: {e}")
152
+
153
+ if (i + 1) % 100 == 0:
154
+ write_status("downloading", f"{i+1}/{len(train_files)}", f"Downloaded {downloaded} files")
155
+ logger.info(f"Download progress: {i+1}/{len(train_files)}")
156
+
157
+ logger.info(f"Downloaded {downloaded} files ({skipped} skipped as existing)")
158
+ write_status("downloaded", f"{downloaded}/{len(train_files)}", "Download complete")
159
+ return downloaded
160
+
161
+ def step3_preprocess():
162
+ """Run RVC preprocessing - extract F0 and features."""
163
+ logger.info("=== Step 3: RVC Preprocessing ===")
164
+
165
+ # Create file list for training
166
+ filelist_path = os.path.join(WORK_DIR, "filelist.txt")
167
+ wav_files = sorted(glob.glob(os.path.join(DATASET_DIR, "*.wav")))
168
+
169
+ if not wav_files:
170
+ logger.error("No WAV files found in dataset directory!")
171
+ return False
172
+
173
+ logger.info(f"Found {len(wav_files)} WAV files for training")
174
+
175
+ with open(filelist_path, "w") as f:
176
+ for wav_path in wav_files:
177
+ f.write(f"{wav_path}|{EXPERIMENT_NAME}|en|0\n")
178
+
179
+ write_status("preprocessing", "f0", "Extracting F0 (pitch)...")
180
+
181
+ # Set environment for CPU
182
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
183
+ os.environ["DEVICE"] = "cpu"
184
+
185
+ # Run F0 extraction using RVC's infer/cli.py
186
+ rvc_train_dir = os.path.join(RVC_DIR, "infer")
187
+ if not os.path.exists(rvc_train_dir):
188
+ rvc_train_dir = RVC_DIR
189
+
190
+ # Try to use RVC's built-in preprocessing
191
+ # First, let's check what's available
192
+ extract_scripts = glob.glob(os.path.join(RVC_DIR, "**/extract_f0*", recursive=True))
193
+ feature_scripts = glob.glob(os.path.join(RVC_DIR, "**/extract_feature*", recursive=True))
194
+ train_scripts = glob.glob(os.path.join(RVC_DIR, "**/train*.py"), recursive=True)
195
+
196
+ logger.info(f"Found extract_f0 scripts: {extract_scripts}")
197
+ logger.info(f"Found extract_feature scripts: {feature_scripts}")
198
+ logger.info(f"Found train scripts: {train_scripts}")
199
+
200
+ # Look for the main training entry point
201
+ # RVC v2 uses a webUI or CLI - let's find it
202
+ all_py = glob.glob(os.path.join(RVC_DIR, "*.py"))
203
+ logger.info(f"Root Python files: {[os.path.basename(p) for p in all_py]}")
204
+
205
+ # Check for go-realtime-gui-jp or similar
206
+ gui_scripts = glob.glob(os.path.join(RVC_DIR, "go*"))
207
+ logger.info(f"go scripts: {gui_scripts}")
208
+
209
+ # The standard RVC approach is to use the web UI or direct function calls
210
+ # Let's use the Python API directly
211
+ sys.path.insert(0, RVC_DIR)
212
+ sys.path.insert(0, os.path.join(RVC_DIR, "infer"))
213
+
214
+ # Create experiment directories
215
+ exp_dir = os.path.join(WORK_DIR, "logs", EXPERIMENT_NAME)
216
+ os.makedirs(exp_dir, exist_ok=True)
217
+
218
+ # Copy filelist to experiment dir
219
+ shutil.copy(filelist_path, os.path.join(exp_dir, "filelist.txt"))
220
+
221
+ return True
222
+
223
+ def step4_train():
224
+ """Train RVC model on CPU."""
225
+ logger.info("=== Step 4: Train RVC Model (CPU) ===")
226
+
227
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
228
+
229
+ # Use RVC's training script directly
230
+ exp_dir = os.path.join(WORK_DIR, "logs", EXPERIMENT_NAME)
231
+ rvc_train_script = None
232
+
233
+ # Find training entry point
234
+ candidates = [
235
+ os.path.join(RVC_DIR, "infer", "train.py"),
236
+ os.path.join(RVC_DIR, "train.py"),
237
+ os.path.join(RVC_DIR, "infer", "lib", "train.py"),
238
+ ]
239
+ for c in candidates:
240
+ if os.path.exists(c):
241
+ rvc_train_script = c
242
+ break
243
+
244
+ # Also check for process methods
245
+ process_train = glob.glob(os.path.join(RVC_DIR, "**", "process*.py"), recursive=True)
246
+ logger.info(f"Process scripts: {process_train}")
247
+
248
+ if rvc_train_script:
249
+ logger.info(f"Using training script: {rvc_train_script}")
250
+
251
+ # Run training
252
+ write_status("training", "0%", f"Training RVC v2 on CPU ({TARGET_STEPS} steps)")
253
+
254
+ cmd = f"""python3 "{rvc_train_script}" \
255
+ --exp_dir "{exp_dir}" \
256
+ --sr {SAMPLE_RATE} \
257
+ --f0 1 \
258
+ --version {VERSION} \
259
+ --batch_size {BATCH_SIZE} \
260
+ --total_epoch {TARGET_STEPS} \
261
+ --save_every_epoch 500 \
262
+ --pretrained None \
263
+ --gpus "" """
264
+
265
+ result = run_cmd(cmd, cwd=RVC_DIR, check=False, timeout=86400) # 24h timeout
266
+ else:
267
+ logger.warning("No standard training script found, trying manual approach...")
268
+ # Manual training using PyTorch
269
+ step4_manual_train()
270
+
271
+ def step4_manual_train():
272
+ """Manual training fallback if RVC scripts not found."""
273
+ logger.info("=== Step 4: Manual Training Fallback ===")
274
+
275
+ import torch
276
+ import numpy as np
277
+ from scipy.io import wavfile
278
+ import torchaudio
279
+
280
+ logger.info("Using manual training approach (basic voice model)")
281
+
282
+ # Load all training segments
283
+ wav_files = sorted(glob.glob(os.path.join(DATASET_DIR, "*.wav")))
284
+ logger.info(f"Loading {len(wav_files)} training segments...")
285
+
286
+ # Collect training data
287
+ all_audio = []
288
+ for wf in wav_files[:500]: # Use top 500 for speed
289
+ try:
290
+ audio, sr = torchaudio.load(wf)
291
+ if sr != SAMPLE_RATE:
292
+ resampler = torchaudio.transforms.Resample(sr, SAMPLE_RATE)
293
+ audio = resampler(audio)
294
+ audio = audio.squeeze()
295
+ if audio.dim() > 1:
296
+ audio = audio.mean(dim=0)
297
+ all_audio.append(audio)
298
+ except Exception as e:
299
+ logger.warning(f"Failed to load {wf}: {e}")
300
+
301
+ if not all_audio:
302
+ logger.error("No audio loaded!")
303
+ return
304
+
305
+ logger.info(f"Loaded {len(all_audio)} audio segments")
306
+
307
+ # Save a combined training file
308
+ combined = torch.cat(all_audio)
309
+ output_path = os.path.join(WORK_DIR, "one_voice_combined.wav")
310
+ torchaudio.save(output_path, combined.unsqueeze(0), SAMPLE_RATE)
311
+ logger.info(f"Saved combined audio: {output_path} ({combined.shape[0]/SAMPLE_RATE:.1f}s)")
312
+
313
+ # Now try to use RVC's actual training pipeline
314
+ sys.path.insert(0, RVC_DIR)
315
+
316
+ # Try importing RVC modules
317
+ try:
318
+ from infer.lib.train import train as rvc_train
319
+ logger.info("Successfully imported RVC train module!")
320
+ rvc_train(exp_dir=WORK_DIR + "/logs/" + EXPERIMENT_NAME)
321
+ except ImportError as e:
322
+ logger.warning(f"Could not import RVC train module: {e}")
323
+ logger.info("Will try alternative training approach...")
324
+
325
+ # List available modules
326
+ infer_dir = os.path.join(RVC_DIR, "infer")
327
+ if os.path.exists(infer_dir):
328
+ for root, dirs, files in os.walk(infer_dir):
329
+ level = root.replace(infer_dir, '').count(os.sep)
330
+ indent = ' ' * 2 * level
331
+ logger.info(f'{indent}{os.path.basename(root)}/')
332
+ subindent = ' ' * 2 * (level + 1)
333
+ for file in files:
334
+ if file.endswith('.py'):
335
+ logger.info(f'{subindent}{file}')
336
+
337
+ def step5_upload_model():
338
+ """Upload trained model to HuggingFace dataset."""
339
+ logger.info("=== Step 5: Upload Model ===")
340
+
341
+ from huggingface_hub import HfApi, upload_folder
342
+ import glob
343
+
344
+ api = HfApi(token=os.environ.get("HF_TOKEN"))
345
+
346
+ # Find model files
347
+ exp_dir = os.path.join(WORK_DIR, "logs", EXPERIMENT_NAME)
348
+ model_files = []
349
+
350
+ # Search for .pth files
351
+ for ext in ['*.pth', '*.pt', '*.index', '*.json']:
352
+ model_files.extend(glob.glob(os.path.join(WORK_DIR, "**", ext), recursive=True))
353
+
354
+ if not model_files:
355
+ logger.warning("No model files found! Training may have failed.")
356
+ write_status("failed", "", "No model files generated")
357
+ return
358
+
359
+ logger.info(f"Found model files: {[os.path.basename(f) for f in model_files]}")
360
+
361
+ # Create models directory and copy files
362
+ models_dir = os.path.join(WORK_DIR, "models_output")
363
+ os.makedirs(models_dir, exist_ok=True)
364
+
365
+ for mf in model_files:
366
+ dest = os.path.join(models_dir, os.path.basename(mf))
367
+ shutil.copy2(mf, dest)
368
+
369
+ # Upload to dataset
370
+ try:
371
+ upload_folder(
372
+ repo_id=DATASET_ID,
373
+ folder_path=models_dir,
374
+ path_in_repo="models",
375
+ repo_type="dataset",
376
+ token=os.environ.get("HF_TOKEN"),
377
+ )
378
+ logger.info("✅ Model uploaded successfully!")
379
+ write_status("completed", "100%", "Model trained and uploaded!")
380
+ except Exception as e:
381
+ logger.error(f"Failed to upload model: {e}")
382
+ write_status("upload_failed", "", str(e))
383
+
384
+ def main():
385
+ logger.info("=" * 60)
386
+ logger.info("RVC v2 CPU Training - NumberBlocks One Voice Cloning")
387
+ logger.info(f"CPU-only mode | Steps: {TARGET_STEPS} | SR: {SAMPLE_RATE}")
388
+ logger.info("=" * 60)
389
+
390
+ os.makedirs(WORK_DIR, exist_ok=True)
391
+
392
+ try:
393
+ write_status("starting", "", "Initializing...")
394
+
395
+ step1_clone_rvc()
396
+ step2_download_data()
397
+ step3_preprocess()
398
+ step4_train()
399
+ step5_upload_model()
400
+
401
+ except Exception as e:
402
+ logger.error(f"Training failed: {e}")
403
+ logger.error(traceback.format_exc())
404
+ write_status("error", "", str(e))
405
+
406
+ # Still try to upload any partial results
407
+ step5_upload_model()
408
+
409
+ if __name__ == "__main__":
410
+ main()