Audio-to-Audio
Transformers
English
music
art
voice-cloning
so-vits-svc
so-vits-svc-fork
quevedo
spanish
Instructions to use lagosproject/quevedo with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use lagosproject/quevedo with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("lagosproject/quevedo", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| import os | |
| import json | |
| from pathlib import Path | |
| import gradio as gr | |
| # Setup paths | |
| MODEL_PATH = Path("G_777.pth") | |
| CONFIG_PATH = Path("config.json") | |
| BANNER_PATH = Path("assets/banner.png") | |
| # Dynamic speaker loader | |
| speakers = ["quevedo"] | |
| if CONFIG_PATH.exists(): | |
| try: | |
| with open(CONFIG_PATH, "r", encoding="utf-8") as f: | |
| config_data = json.load(f) | |
| if "spk" in config_data: | |
| speakers = list(config_data["spk"].keys()) | |
| except Exception as e: | |
| print(f"Error loading speakers from config: {e}") | |
| # Inference function | |
| def convert_voice(input_audio, speaker, transpose, auto_predict_f0, f0_method, noise_scale): | |
| if input_audio is None: | |
| return None, "Please upload an audio file or use the microphone." | |
| input_path = Path(input_audio) | |
| output_path = input_path.parent / f"{input_path.stem}_quevedo.wav" | |
| # Lazy import to avoid startup errors if so-vits-svc-fork is not yet installed | |
| try: | |
| from so_vits_svc_fork.inference.main import infer | |
| except ImportError: | |
| return None, ( | |
| "Error: 'so-vits-svc-fork' is not installed in this environment.\n" | |
| "Please run: pip install so-vits-svc-fork" | |
| ) | |
| if not MODEL_PATH.exists(): | |
| return None, f"Error: Model file {MODEL_PATH} not found." | |
| if not CONFIG_PATH.exists(): | |
| return None, f"Error: Config file {CONFIG_PATH} not found." | |
| try: | |
| # Perform inference using the fork's main infer function | |
| infer( | |
| input_path=input_path, | |
| output_path=output_path, | |
| model_path=MODEL_PATH, | |
| config_path=CONFIG_PATH, | |
| recursive=False, | |
| speaker=speaker, | |
| transpose=int(transpose), | |
| auto_predict_f0=bool(auto_predict_f0), | |
| noise_scale=float(noise_scale), | |
| f0_method=f0_method | |
| ) | |
| if output_path.exists(): | |
| return str(output_path), "Conversion completed successfully!" | |
| else: | |
| return None, "Error: Output file was not generated." | |
| except Exception as e: | |
| return None, f"Error during inference: {str(e)}" | |
| # Custom CSS for premium styling matching the blue-purple theme | |
| custom_css = """ | |
| body { | |
| background-color: #0b0c10; | |
| } | |
| .gradio-container { | |
| background-color: #0b0c10 !important; | |
| font-family: 'Outfit', 'Inter', sans-serif !important; | |
| max-width: 900px !important; | |
| margin: 0 auto !important; | |
| border-radius: 12px; | |
| } | |
| .header-area { | |
| text-align: center; | |
| padding: 20px 0; | |
| } | |
| .header-title { | |
| color: #4f46e5; | |
| background: linear-gradient(90deg, #818cf8 0%, #c084fc 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| font-weight: 800 !important; | |
| font-size: 2.5rem !important; | |
| margin-bottom: 0.5rem; | |
| } | |
| .header-desc { | |
| color: #9ca3af; | |
| font-size: 1.1rem; | |
| margin-bottom: 20px; | |
| } | |
| .main-box { | |
| background: rgba(17, 24, 39, 0.7); | |
| border: 1px solid rgba(255, 255, 255, 0.1); | |
| backdrop-filter: blur(10px); | |
| border-radius: 16px; | |
| padding: 20px; | |
| margin-bottom: 20px; | |
| } | |
| .convert-btn { | |
| background: linear-gradient(135deg, #6366f1 0%, #a855f7 100%) !important; | |
| border: none !important; | |
| color: white !important; | |
| font-weight: bold !important; | |
| transition: all 0.3s ease !important; | |
| } | |
| .convert-btn:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 4px 20px rgba(139, 92, 246, 0.4); | |
| } | |
| """ | |
| # Build Gradio UI | |
| with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo: | |
| # Banner/Header | |
| with gr.Row(): | |
| # Fallback to CDN URL if local banner is missing or is just a small Git LFS pointer file | |
| if BANNER_PATH.exists() and BANNER_PATH.stat().st_size > 5000: | |
| gr.Image(str(BANNER_PATH), show_label=False, container=False, interactive=False) | |
| else: | |
| gr.Image("https://huggingface.co/lagosproject/quevedo/resolve/main/assets/banner.png", show_label=False, container=False, interactive=False) | |
| with gr.Row(elem_classes=["header-area"]): | |
| gr.HTML( | |
| "<h1 class='header-title'>🗣️ Quevedo Voice Model (so-vits-svc-fork)</h1>" | |
| "<p class='header-desc'>Convert any voice or singing file into the voice of the Spanish singer Quevedo.</p>" | |
| ) | |
| # Main conversion section | |
| with gr.Row(elem_classes=["main-box"]): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📥 1. Audio Input") | |
| input_audio = gr.Audio( | |
| label="Audio to Convert (Clean Vocals / Acapella)", | |
| type="filepath", | |
| sources=["upload", "microphone"] | |
| ) | |
| gr.Markdown("### ⚙️ 2. Conversion Parameters") | |
| speaker = gr.Dropdown( | |
| choices=speakers, | |
| value=speakers[0], | |
| label="Speaker Name" | |
| ) | |
| transpose = gr.Slider( | |
| minimum=-12, | |
| maximum=12, | |
| value=0, | |
| step=1, | |
| label="Pitch Shift (Semitones)", | |
| info="Increase for female-to-male voices (e.g. -5 to -12), or decrease for male-to-female." | |
| ) | |
| with gr.Accordion("Advanced Options", open=False): | |
| auto_predict_f0 = gr.Checkbox( | |
| value=False, | |
| label="Auto Predict F0", | |
| info="Recommended for speech/narration. UNCHECK for singing to preserve notes." | |
| ) | |
| f0_method = gr.Dropdown( | |
| choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"], | |
| value="crepe", | |
| label="F0 Predictor Algorithm", | |
| info="crepe offers the best quality but is slower; dio is the fastest." | |
| ) | |
| noise_scale = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.4, | |
| step=0.05, | |
| label="Noise Scale", | |
| info="Controls pitch variance and expressiveness (0.4 is standard)." | |
| ) | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📤 3. Output Audio") | |
| output_audio = gr.Audio( | |
| label="Converted Audio", | |
| type="filepath" | |
| ) | |
| status_output = gr.Textbox( | |
| label="Status", | |
| value="Ready", | |
| interactive=False | |
| ) | |
| submit_btn = gr.Button( | |
| "Convert Voice 🚀", | |
| variant="primary", | |
| elem_classes=["convert-btn"] | |
| ) | |
| submit_btn.click( | |
| fn=convert_voice, | |
| inputs=[input_audio, speaker, transpose, auto_predict_f0, f0_method, noise_scale], | |
| outputs=[output_audio, status_output] | |
| ) | |
| # Footer | |
| gr.HTML( | |
| "<div style='text-align: center; color: #4b5563; font-size: 0.85rem; padding: 20px 0;'>" | |
| "This model is for artistic demonstration and research purposes only. " | |
| "Uses so-vits-svc-fork for inference.<br>" | |
| "Developed with 💜 for the open voice community.</div>" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |