Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import argparse | |
| import os | |
| import ssl | |
| import sys | |
| import tempfile | |
| import urllib.request | |
| from datetime import datetime | |
| from pathlib import Path | |
| from typing import Optional | |
| from gradio_client import Client, handle_file | |
| from gradio_client.client import DEFAULT_TEMP_DIR | |
| # Handle imports for both module and script usage | |
| try: | |
| from tools.audio_info import validate_audio_path | |
| except ImportError: | |
| from audio_info import validate_audio_path | |
| def resolve_audio_path(audio_path: str) -> str: | |
| """ | |
| Resolve audio path - handle both local files and URLs. | |
| Args: | |
| audio_path: Path to local audio file or URL | |
| Returns: | |
| Path to local audio file (downloads if URL) | |
| Raises: | |
| ValueError: If path is invalid | |
| RuntimeError: If URL download fails | |
| """ | |
| if not audio_path: | |
| raise ValueError("Audio path cannot be empty") | |
| # Check if it's a URL | |
| if audio_path.startswith(("http://", "https://")): | |
| return download_audio_from_url(audio_path) | |
| else: | |
| # Handle local file | |
| return validate_audio_path(audio_path) | |
| def download_audio_from_url(url: str, output_path: Optional[str] = None) -> str: | |
| """ | |
| Download audio from URL to temporary file or specified output path. | |
| Args: | |
| url: URL to audio file | |
| output_path: Optional custom output path (if None, uses temp directory) | |
| Returns: | |
| Path to downloaded file | |
| Raises: | |
| RuntimeError: If download fails | |
| """ | |
| if output_path: | |
| temp_path = output_path | |
| else: | |
| temp_dir = tempfile.gettempdir() | |
| filename = f"voice_replacement_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav" | |
| temp_path = os.path.join(temp_dir, filename) | |
| # Try multiple download methods | |
| download_methods = [ | |
| # Method 1: Standard SSL context | |
| lambda: _download_with_ssl_context( | |
| url, temp_path, ssl.VerifyMode.CERT_REQUIRED | |
| ), | |
| # Method 2: Relaxed SSL (ignore cert errors) | |
| lambda: _download_with_ssl_context(url, temp_path, ssl.VerifyMode.CERT_NONE), | |
| # Method 3: No SSL verification | |
| lambda: _download_no_ssl(url, temp_path), | |
| ] | |
| last_error = None | |
| for i, download_method in enumerate(download_methods): | |
| try: | |
| download_method() | |
| if not os.path.exists(temp_path) or os.path.getsize(temp_path) == 0: | |
| raise RuntimeError(f"Downloaded file is empty or missing: {temp_path}") | |
| return temp_path | |
| except Exception as e: | |
| last_error = e | |
| if i < len(download_methods) - 1: | |
| # Clean up partial download and try next method | |
| if os.path.exists(temp_path): | |
| os.remove(temp_path) | |
| continue | |
| raise RuntimeError( | |
| f"Failed to download audio from URL {url}. Last error: {str(last_error)}" | |
| ) | |
| def _download_with_ssl_context( | |
| url: str, temp_path: str, verify_mode: ssl.VerifyMode | |
| ) -> None: | |
| """Download with specific SSL certificate mode.""" | |
| ssl_context = ssl.create_default_context() | |
| ssl_context.check_hostname = False | |
| ssl_context.verify_mode = verify_mode | |
| req = urllib.request.Request(url) | |
| req.add_header("User-Agent", "Mozilla/5.0 (compatible; Voice-Replacement-Tool/1.0)") | |
| with urllib.request.urlopen(req, context=ssl_context) as response: | |
| with open(temp_path, "wb") as f: | |
| f.write(response.read()) | |
| def _download_no_ssl(url: str, temp_path: str) -> None: | |
| """Download without SSL verification.""" | |
| req = urllib.request.Request(url) | |
| req.add_header("User-Agent", "Mozilla/5.0 (compatible; Voice-Replacement-Tool/1.0)") | |
| # Open without SSL context | |
| with urllib.request.urlopen(req) as response: | |
| with open(temp_path, "wb") as f: | |
| f.write(response.read()) | |
| def cleanup_temp_file(file_path: str) -> None: | |
| """ | |
| Clean up temporary file if it exists. | |
| Args: | |
| file_path: Path to temporary file | |
| """ | |
| try: | |
| if os.path.exists(file_path) and file_path.startswith(tempfile.gettempdir()): | |
| os.remove(file_path) | |
| except Exception: | |
| # Ignore cleanup errors | |
| pass | |
| def replace_voice( | |
| source_audio_path: str, | |
| target_audio_path: str, | |
| diffusion_steps: int = 10, | |
| length_adjust: float = 1.0, | |
| inference_cfg_rate: float = 0.7, | |
| f0_condition: bool = False, | |
| auto_f0_adjust: bool = True, | |
| pitch_shift: int = 0, | |
| ) -> str: | |
| """ | |
| Replace voice in source audio with voice from target audio using Seed-VC. | |
| This function uses Seed-VC Gradio space to perform voice conversion, | |
| replacing voice characteristics in source audio with those from | |
| target audio while preserving linguistic content and timing. | |
| Examples: | |
| >>> replace_voice("source.wav", "target.wav") | |
| # Returns 'path/to/source_voice_replaced_by_target_20251126_143022.wav' | |
| >>> replace_voice("https://example.com/source.wav", "target.wav", diffusion_steps=15) | |
| # Downloads source audio and replaces voice with target voice | |
| >>> replace_voice("source.wav", "https://example.com/voice.mp3", pitch_shift=2) | |
| # Downloads target voice and applies to source with pitch shift | |
| Args: | |
| source_audio_path: Path to source audio file or URL (voice to be replaced) | |
| Supports local files and HTTP/HTTPS URLs | |
| target_audio_path: Path to target audio file or URL (voice to use) | |
| Supports local files and HTTP/HTTPS URLs | |
| diffusion_steps: Number of diffusion steps for inference (default: 10) | |
| length_adjust: Length adjustment factor (default: 1.0) | |
| inference_cfg_rate: Classifier-free guidance rate (default: 0.7) | |
| f0_condition: Whether to use F0 conditioning (default: False) | |
| auto_f0_adjust: Whether to auto-adjust F0 (default: True) | |
| pitch_shift: Pitch shift in semitones (default: 0) | |
| Returns: | |
| Path to generated voice-replaced audio file | |
| Raises: | |
| FileNotFoundError: If source or target audio files don't exist | |
| ValueError: If parameters are invalid | |
| RuntimeError: If voice replacement fails | |
| """ | |
| source_temp_file = None | |
| target_temp_file = None | |
| try: | |
| # Resolve input paths (handle both URLs and local files) | |
| source_abs_path = resolve_audio_path(source_audio_path) | |
| target_abs_path = resolve_audio_path(target_audio_path) | |
| # Track temporary files for cleanup | |
| if source_audio_path.startswith(("http://", "https://")): | |
| source_temp_file = source_abs_path | |
| if target_audio_path.startswith(("http://", "https://")): | |
| target_temp_file = target_abs_path | |
| # Validate parameters | |
| if diffusion_steps < 1 or diffusion_steps > 50: | |
| raise ValueError("diffusion_steps must be between 1 and 50") | |
| if length_adjust <= 0: | |
| raise ValueError("length_adjust must be positive") | |
| if not 0 <= inference_cfg_rate <= 1: | |
| raise ValueError("inference_cfg_rate must be between 0 and 1") | |
| if pitch_shift < -12 or pitch_shift > 12: | |
| raise ValueError("pitch_shift must be between -12 and 12 semitones") | |
| # Initialize Seed-VC client with manual file handling | |
| client = Client("frascuchon/Seed-VC", download_files=False) | |
| # Prepare file handles for manual upload | |
| source_handle = handle_file(source_abs_path) | |
| target_handle = handle_file(target_abs_path) | |
| # Perform voice replacement | |
| result = client.predict( | |
| source_audio_path=source_handle, | |
| target_audio_path=target_handle, | |
| diffusion_steps=diffusion_steps, | |
| length_adjust=length_adjust, | |
| inference_cfg_rate=inference_cfg_rate, | |
| f0_condition=f0_condition, | |
| auto_f0_adjust=auto_f0_adjust, | |
| pitch_shift=pitch_shift, | |
| api_name="/predict_1", | |
| ) | |
| # Create output directory | |
| output_dir = Path(DEFAULT_TEMP_DIR) | |
| output_dir.mkdir(exist_ok=True) | |
| # Generate output filename with timestamp | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| source_name = Path(source_abs_path).stem | |
| target_name = Path(target_abs_path).stem | |
| output_filename = ( | |
| f"{source_name}_voice_replaced_by_{target_name}_{timestamp}.wav" | |
| ) | |
| output_path = output_dir / output_filename | |
| # Handle result - check if it's a file path or needs manual download | |
| if hasattr(result, "url") and result.url: | |
| # Result is a file object with URL - download manually | |
| download_audio_from_url(result.url, str(output_path)) | |
| elif isinstance(result, str) and os.path.exists(result): | |
| # Result is a local file path - copy it | |
| import shutil | |
| shutil.copy2(result, output_path) | |
| elif isinstance(result, (tuple, list)): | |
| import shutil | |
| # Only download the second item if multiple outputs | |
| item = result[0] | |
| if len(result) > 1: | |
| item = result[1] | |
| if url := item.get("url"): | |
| # Download each URL to a separate file | |
| item_output = str(output_path) | |
| download_audio_from_url(url, item_output) | |
| elif isinstance(item, str) and os.path.exists(item): | |
| # Copy each local file | |
| item_output = str(output_path) | |
| shutil.copy2(item, item_output) | |
| else: | |
| raise RuntimeError(f"Unexpected result format in tuple: {item}") | |
| shutil.move(item_output, output_path) | |
| else: | |
| # Result is audio data - save it directly | |
| import soundfile as sf | |
| sf.write(str(output_path), result, 22050) | |
| return str(output_path) | |
| except Exception as e: | |
| # Handle specific Seed-VC errors | |
| error_msg = str(e) | |
| if "403" in error_msg or "Forbidden" in error_msg: | |
| raise RuntimeError( | |
| "Seed-VC access denied. This may indicate:\n" | |
| "1. Files are in unsupported format\n" | |
| "2. Files are too large\n" | |
| "3. Temporary space restrictions\n" | |
| "4. Authentication required\n\n" | |
| "TROUBLESHOOTING:\n" | |
| "⢠Try different audio files (WAV, MP3, FLAC, M4A)\n" | |
| "⢠Use smaller files (< 30MB recommended)\n" | |
| "⢠Check if files are corrupted\n" | |
| "⢠Try again later if rate limited\n" | |
| "⢠Consider using a different voice source/target" | |
| ) | |
| elif "404" in error_msg or "Not Found" in error_msg: | |
| raise RuntimeError( | |
| "Seed-VC cannot find one or both files. " | |
| "Check if:\n" | |
| "⢠Files exist and are accessible\n" | |
| "⢠File paths are correct\n" | |
| "⢠Files are in supported format (WAV, MP3, FLAC, M4A)\n" | |
| "⢠Manual download was successful" | |
| ) | |
| elif "timeout" in error_msg.lower(): | |
| raise RuntimeError( | |
| "Seed-VC connection timeout. " | |
| "Try:\n" | |
| "⢠Using fewer diffusion steps (5-10)\n" | |
| "⢠Smaller audio files\n" | |
| "⢠Processing again later\n" | |
| "⢠Checking internet connection" | |
| ) | |
| else: | |
| raise RuntimeError(f"Voice replacement failed: {error_msg}") | |
| finally: | |
| # Always clean up temporary files | |
| if source_temp_file: | |
| cleanup_temp_file(source_temp_file) | |
| if target_temp_file: | |
| cleanup_temp_file(target_temp_file) | |
| def replace_voice_wrapper( | |
| source_audio_path: str, | |
| target_audio_path: str, | |
| diffusion_steps: int = 10, | |
| length_adjust: float = 1.0, | |
| inference_cfg_rate: float = 0.7, | |
| f0_condition: bool = False, | |
| auto_f0_adjust: bool = True, | |
| pitch_shift: int = 0, | |
| ) -> str: | |
| """ | |
| Wrapper function for voice replacement with error handling for MCP integration. | |
| Args: | |
| source_audio_path: Path to input audio file or URL | |
| target_audio_path: Path to target audio file or URL | |
| diffusion_steps: Number of diffusion steps (default: 10) | |
| length_adjust: Length adjustment factor (default: 1.0) | |
| inference_cfg_rate: CFG rate (default: 0.7) | |
| f0_condition: Use F0 conditioning (default: False) | |
| auto_f0_adjust: Auto-adjust F0 (default: True) | |
| pitch_shift: Pitch shift in semitones (default: 0) | |
| Returns: | |
| Path to generated audio file or error message | |
| Note for URL usage: | |
| Some URLs may be blocked by Seed-VC space restrictions. | |
| If URL processing fails with access errors, try: | |
| 1. Download the file manually using your browser | |
| 2. Save it locally and use the local file path | |
| 3. Use a different audio source or target | |
| """ | |
| try: | |
| return replace_voice( | |
| source_audio_path=source_audio_path, | |
| target_audio_path=target_audio_path, | |
| diffusion_steps=diffusion_steps, | |
| length_adjust=length_adjust, | |
| inference_cfg_rate=inference_cfg_rate, | |
| f0_condition=f0_condition, | |
| auto_f0_adjust=auto_f0_adjust, | |
| pitch_shift=pitch_shift, | |
| ) | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| if __name__ == "__main__": | |
| """ | |
| Script section for running voice replacement locally. | |
| Usage: | |
| python tools/voice_replacement.py source.wav target.wav | |
| python tools/voice_replacement.py source.wav target.wav --steps 15 --pitch 2 | |
| python tools/voice_replacement.py https://example.com/source.wav target.wav | |
| python tools/voice_replacement.py source.wav https://example.com/target.mp3 --pitch 2 | |
| """ | |
| # Add parent directory to path for imports | |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| parser = argparse.ArgumentParser( | |
| description="Voice replacement using Seed-VC", | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| epilog=""" | |
| Examples: | |
| python tools/voice_replacement.py source.wav target.wav | |
| python tools/voice_replacement.py source.wav target.wav --steps 15 --pitch 2 | |
| python tools/voice_replacement.py source.wav target.wav --f0-condition --no-auto-f0 | |
| python tools/voice_replacement.py https://example.com/source.wav target.wav | |
| python tools/voice_replacement.py source.wav https://example.com/target.mp3 --pitch 2 | |
| """, | |
| ) | |
| parser.add_argument( | |
| "source", help="Source audio path or URL (voice to be replaced)" | |
| ) | |
| parser.add_argument("target", help="Target audio path or URL (voice to use)") | |
| parser.add_argument( | |
| "--steps", type=int, default=10, help="Diffusion steps (1-50, default: 10)" | |
| ) | |
| parser.add_argument( | |
| "--length", | |
| type=float, | |
| default=1.0, | |
| help="Length adjustment (0.1-3.0, default: 1.0)", | |
| ) | |
| parser.add_argument( | |
| "--cfg", | |
| type=float, | |
| default=0.7, | |
| help="Inference CFG rate (0.0-1.0, default: 0.7)", | |
| ) | |
| parser.add_argument( | |
| "--f0-condition", action="store_true", help="Enable F0 conditioning" | |
| ) | |
| parser.add_argument( | |
| "--no-auto-f0", action="store_true", help="Disable auto F0 adjustment" | |
| ) | |
| parser.add_argument( | |
| "--pitch", | |
| type=int, | |
| default=0, | |
| help="Pitch shift semitones (-12 to 12, default: 0)", | |
| ) | |
| args = parser.parse_args() | |
| print("Voice Replacement Tool") | |
| print("=" * 30) | |
| print(f"Source: {args.source}") | |
| print(f"Target: {args.target}") | |
| print(f"Parameters: steps={args.steps}, length={args.length}, cfg={args.cfg}") | |
| print( | |
| f"F0 condition={args.f0_condition}, auto F0={not args.no_auto_f0}, pitch={args.pitch}" | |
| ) | |
| print() | |
| try: | |
| result = replace_voice( | |
| source_audio_path=args.source, | |
| target_audio_path=args.target, | |
| diffusion_steps=args.steps, | |
| length_adjust=args.length, | |
| inference_cfg_rate=args.cfg, | |
| f0_condition=args.f0_condition, | |
| auto_f0_adjust=not args.no_auto_f0, | |
| pitch_shift=args.pitch, | |
| ) | |
| print("ā Voice replacement completed!") | |
| print(f"Output saved to: {result}") | |
| except Exception as e: | |
| print(f"ā Error: {e}") | |
| sys.exit(1) | |