RVC-CH

Running

App Files Files Community

LosCaquitos commited on 13 days ago

Commit

0d49186

verified ·

1 Parent(s): 4415153

Update app.py

Browse files

Files changed (1) hide show

app.py +569 -594

app.py CHANGED Viewed

@@ -1,651 +1,626 @@
 """
-RVC Voice Conversion – Simplified & Working Version
-Suporta: MP3, WAV, FLAC, OGG, M4A, MP4, MKV, WebM, AVI, MOV, FLV
-Gradio 6.0 Compatible
-Com aba Jobs para mostrar status
 """
 from __future__ import annotations
 import os
 import subprocess
 import tempfile
-import shutil
 from pathlib import Path
-import json
-import uuid
-from datetime import datetime
-import traceback
-import threading
 import gradio as gr
-import numpy as np
-import librosa
-import soundfile as sf
-from scipy import signal
-import zipfile
-DEVICE = "cpu"
-DEVICE_LABEL = "CPU (Stable)"
-MODELS_DIR = Path("models")
-OUTPUTS_DIR = Path("outputs")
-JOBS_DIR = Path("conversion_jobs")
-MODELS_DIR.mkdir(exist_ok=True)
-OUTPUTS_DIR.mkdir(exist_ok=True)
-JOBS_DIR.mkdir(exist_ok=True)
-CONFIG = {
-    "sample_rate": 16000,
-    "n_fft": 800,
-    "hop_length": 200,
-}
-AUDIO_FORMATS = ["wav", "mp3", "flac", "ogg", "m4a", "aac", "wma", "mp4", "mkv", "webm", "avi", "mov", "flv"]
-VIDEO_FORMATS = ["mp4", "mkv", "webm", "avi", "mov", "flv", "m4v"]
-# ============================================================================
-# JOB MANAGER PARA CONVERSÃO
-# ============================================================================
-class ConversionJobManager:
-    def __init__(self):
-        self.jobs = {}
-        self.load_jobs()
-    def load_jobs(self):
-        """Carrega jobs salvos"""
-        for job_file in JOBS_DIR.glob("*.json"):
-            try:
-                with open(job_file) as f:
-                    job = json.load(f)
-                    self.jobs[job["id"]] = job
-            except:
-                pass
-    def create_job(self, job_type, filename):
-        """Cria novo job"""
-        job_id = str(uuid.uuid4())[:8]
-        job = {
-            "id": job_id,
-            "type": job_type,  # "audio" ou "video"
-            "filename": filename,
-            "status": "waiting",  # waiting, converting, done
-            "progress": 0,
-            "created_at": datetime.now().isoformat(),
-            "updated_at": datetime.now().isoformat(),
-            "result_files": {
-                "entrada": None,
-                "entrada_acapella": None,
-                "entrada_instrumental": None,
-                "saida_acapella": None,
-                "saida": None,
-                "video_output": None,
-                "zip": None,
-            },
-            "error": None,
-        }
-        self.jobs[job_id] = job
-        self.save_job(job_id)
-        return job_id
-    def save_job(self, job_id):
-        """Salva job"""
-        job_file = JOBS_DIR / f"{job_id}.json"
-        with open(job_file, "w") as f:
-            json.dump(self.jobs[job_id], f, indent=2)
-    def update_job(self, job_id, **kwargs):
-        """Atualiza job"""
-        if job_id in self.jobs:
-            self.jobs[job_id].update(kwargs)
-            self.jobs[job_id]["updated_at"] = datetime.now().isoformat()
-            self.save_job(job_id)
-    def get_job(self, job_id):
-        return self.jobs.get(job_id)
-    def list_jobs(self):
-        """Retorna jobs ordenados por data (mais recentes primeiro)"""
-        return sorted(self.jobs.values(), key=lambda x: x["updated_at"], reverse=True)
-job_manager = ConversionJobManager()
-# ============================================================================
-# FUNÇÕES
-# ============================================================================
-def load_audio(path: str) -> tuple:
-    """Carrega áudio"""
-    try:
-        y, sr = librosa.load(path, sr=16000, mono=True)
-        return y, sr
-    except Exception as e:
-        print(f"Erro ao carregar áudio: {e}")
-        return None, None
-def separate_vocals(audio: np.ndarray, sr: int) -> tuple:
-    """Separa vocais e instrumentação"""
-    try:
-        sos = signal.butter(5, 300, 'hp', fs=sr, output='sos')
-        vocals = signal.sosfilt(sos, audio)
-        instrumental = audio - vocals * 0.5
-        return vocals, instrumental
-    except Exception as e:
-        print(f"Erro ao separar vocais: {e}")
-        return audio, np.zeros_like(audio)
-def apply_pitch_shift(audio: np.ndarray, sr: int, n_steps: int) -> np.ndarray:
-    """Aplica pitch shift"""
-    try:
-        if n_steps == 0:
-            return audio
-        return librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps)
-    except Exception as e:
-        print(f"Erro no pitch shift: {e}")
-        return audio
-def apply_reverb_simple(audio: np.ndarray, sr: int, wet: float = 0.15) -> np.ndarray:
-    """Aplica reverb"""
-    try:
-        delay_samples = int(sr * 0.05)
-        delayed = np.concatenate([np.zeros(delay_samples), audio[:-delay_samples]])
-        return audio * (1 - wet) + delayed * wet
-    except Exception as e:
-        print(f"Erro no reverb: {e}")
-        return audio
-def apply_noise_reduction(audio: np.ndarray, strength: float = 0.5) -> np.ndarray:
-    """Remove ruído"""
-    try:
-        threshold = np.percentile(np.abs(audio), strength * 100)
-        audio_copy = audio.copy()
-        audio_copy[np.abs(audio_copy) < threshold] *= 0.5
-        return audio_copy
-    except Exception as e:
-        print(f"Erro na redução de ruído: {e}")
-        return audio
-def normalize_audio(audio: np.ndarray) -> np.ndarray:
-    """Normaliza volume"""
-    if np.max(np.abs(audio)) > 0:
-        return audio / np.max(np.abs(audio)) * 0.95
-    return audio
 def extract_audio_from_video(video_path: str) -> str:
-    """Extrai áudio de vídeo"""
     try:
-        output_audio = Path(tempfile.gettempdir()) / f"temp_audio_{uuid.uuid4().hex[:8]}.wav"
-        cmd = [
-            "ffmpeg", "-i", video_path,
-            "-q:a", "0", "-map", "a",
-            "-y", str(output_audio)
-        ]
-        result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
-        if result.returncode != 0:
-            print(f"Erro FFmpeg: {result.stderr}")
-            return None
-        return str(output_audio)
     except Exception as e:
-        print(f"Erro ao extrair áudio de vídeo: {e}")
-        return None
-def convert_to_wav_16k(audio_path: str) -> str:
-    """Converte para WAV 16kHz"""
-    try:
-        y, sr = librosa.load(audio_path, sr=16000, mono=True)
-        output_path = Path(audio_path).with_suffix(".wav")
-        sf.write(output_path, y, 16000)
-        return str(output_path)
-    except Exception as e:
-        print(f"Erro ao converter: {e}")
-        return None
-def convert_voice_simple(
-    audio_path: str,
-    pitch: int = 0,
-    clean: bool = False,
-    reverb: bool = False,
-    job_id: str = None,
-) -> dict:
-    """Conversão de voz SIMPLIFICADA"""
-    if job_id is None:
-        job_id = str(uuid.uuid4())[:8]
-    output_dir = OUTPUTS_DIR / job_id
-    output_dir.mkdir(exist_ok=True)
     try:
-        # Atualizar status
-        if job_id:
-            job_manager.update_job(job_id, status="converting", progress=10)
-        # 1. Carregar áudio
-        audio, sr = load_audio(audio_path)
-        if audio is None:
-            raise ValueError("Erro ao carregar áudio")
-        # Salvar entrada original
-        entrada_path = output_dir / "entrada.wav"
-        sf.write(entrada_path, audio, sr)
-        if job_id:
-            job_manager.update_job(job_id, progress=20)
-        # 2. Separar vocais e instrumentação
-        vocals, instrumental = separate_vocals(audio, sr)
-        # Salvar entrada acapella (vocais originais)
-        entrada_acapella_path = output_dir / "entrada_acapella.wav"
-        sf.write(entrada_acapella_path, vocals, sr)
-        # Salvar entrada instrumental
-        entrada_instrumental_path = output_dir / "entrada_instrumental.wav"
-        sf.write(entrada_instrumental_path, instrumental, sr)
-        if job_id:
-            job_manager.update_job(job_id, progress=40)
-        # 3. Processar vocais
-        vocals_converted = vocals.copy()
-        # Aplicar pitch shift
-        if pitch != 0:
-            vocals_converted = apply_pitch_shift(vocals_converted, sr, pitch)
-        # Aplicar ruído reduction
-        if clean:
-            vocals_converted = apply_noise_reduction(vocals_converted, strength=0.5)
-        # Aplicar reverb
-        if reverb:
-            vocals_converted = apply_reverb_simple(vocals_converted, sr, wet=0.15)
-        # Normalizar
-        vocals_converted = normalize_audio(vocals_converted)
-        # Salvar saida acapella (vocais processados)
-        saida_acapella_path = output_dir / "saida_acapella.wav"
-        sf.write(saida_acapella_path, vocals_converted, sr)
-        if job_id:
-            job_manager.update_job(job_id, progress=70)
-        # 4. Mixar vocal processado com instrumental original
-        min_len = min(len(vocals_converted), len(instrumental))
-        mix = vocals_converted[:min_len] + instrumental[:min_len]
-        mix = normalize_audio(mix)
-        # Salvar mix final
-        saida_path = output_dir / "saida.wav"
-        sf.write(saida_path, mix, sr)
-        if job_id:
-            job_manager.update_job(job_id, progress=85)
-        return {
-            "status": "success",
-            "entrada": str(entrada_path),
-            "entrada_acapella": str(entrada_acapella_path),
-            "entrada_instrumental": str(entrada_instrumental_path),
-            "saida_acapella": str(saida_acapella_path),
-            "saida": str(saida_path),
-            "output_dir": str(output_dir),
-        }
-    except Exception as e:
-        print(f"Erro na conversão: {e}")
-        traceback.print_exc()
-        if job_id:
-            job_manager.update_job(job_id, status="error", error=str(e))
-        return {
-            "status": "error",
-            "error": str(e),
-        }
-def process_video_simple(
-    video_path: str,
-    pitch: int = 0,
-    clean: bool = False,
-    reverb: bool = False,
-    job_id: str = None,
-) -> dict:
-    """Processamento de vídeo SIMPLIFICADO"""
-    if job_id is None:
-        job_id = str(uuid.uuid4())[:8]
-    output_dir = OUTPUTS_DIR / job_id
-    output_dir.mkdir(exist_ok=True)
-    try:
-        # 1. Extrair áudio do vídeo
-        if job_id:
-            job_manager.update_job(job_id, status="converting", progress=5)
-        temp_audio = output_dir / "temp_audio.wav"
-        cmd = [
-            "ffmpeg", "-i", video_path, "-q:a", "0", "-map", "a",
-            "-y", str(temp_audio)
-        ]
-        subprocess.run(cmd, check=True, capture_output=True, timeout=600)
-        if job_id:
-            job_manager.update_job(job_id, progress=15)
-        # 2. Processar áudio
-        result = convert_voice_simple(str(temp_audio), pitch=pitch, clean=clean, reverb=reverb, job_id=job_id)
-        if result["status"] != "success":
-            return result
-        if job_id:
-            job_manager.update_job(job_id, progress=75)
-        # 3. Remixar vídeo com áudio novo
-        output_video = output_dir / "saida_video.mp4"
         cmd = [
-            "ffmpeg", "-i", video_path, "-i", result["saida"],
-            "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0",
-            "-y", str(output_video)
         ]
-        subprocess.run(cmd, check=True, capture_output=True, timeout=600)
-        # Limpar temporário
-        temp_audio.unlink(missing_ok=True)
-        if job_id:
-            job_manager.update_job(job_id, progress=90)
-        return {
-            **result,
-            "video_output": str(output_video),
-        }
     except Exception as e:
-        print(f"Erro no processamento de vídeo: {e}")
-        traceback.print_exc()
-        if job_id:
-            job_manager.update_job(job_id, status="error", error=str(e))
-        return {
-            "status": "error",
-            "error": str(e),
-        }
-def create_zip(output_dir: Path) -> str:
-    """Cria ZIP com todos os arquivos"""
-    try:
-        zip_path = output_dir.parent / f"{output_dir.name}.zip"
-        with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
-            for file in sorted(output_dir.glob("*.wav")) + sorted(output_dir.glob("*.mp4")):
-                zf.write(file, arcname=file.name)
-        return str(zip_path)
-    except Exception as e:
-        print(f"Erro ao criar ZIP: {e}")
-        return None
-def submit_audio_async(audio_mic, audio_file, pitch, clean, reverb):
-    """Handler de conversão de áudio em thread"""
-    audio_path = audio_mic or audio_file
-    if not audio_path:
-        return "❌ Nenhum áudio fornecido"
-    # Criar job
-    filename = Path(audio_path).name
-    job_id = job_manager.create_job("audio", filename)
-    # Processar em thread
-    def process():
-        try:
-            result = convert_voice_simple(audio_path, pitch=pitch, clean=clean, reverb=reverb, job_id=job_id)
-            if result["status"] != "success":
-                job_manager.update_job(job_id, status="error", error=result.get("error", "Erro desconhecido"))
-                return
-            zip_path = create_zip(Path(result["output_dir"]))
-            job_manager.update_job(
-                job_id,
-                status="done",
-                progress=100,
-                result_files={
-                    "entrada": result["entrada"],
-                    "entrada_acapella": result["entrada_acapella"],
-                    "entrada_instrumental": result["entrada_instrumental"],
-                    "saida_acapella": result["saida_acapella"],
-                    "saida": result["saida"],
-                    "zip": zip_path,
-                }
-            )
-        except Exception as e:
-            job_manager.update_job(job_id, status="error", error=str(e))
-    thread = threading.Thread(target=process, daemon=False)
-    thread.start()
-    return f"✅ Conversão iniciada! Job ID: {job_id}\n\nVá para a aba 'Jobs' para acompanhar."
-def submit_video_async(video_file, pitch, clean, reverb):
-    """Handler de conversão de vídeo em thread"""
-    if not video_file:
-        return "❌ Nenhum vídeo fornecido"
-    # Criar job
-    filename = Path(video_file).name
-    job_id = job_manager.create_job("video", filename)
-    # Processar em thread
-    def process():
-        try:
-            result = process_video_simple(video_file, pitch=pitch, clean=clean, reverb=reverb, job_id=job_id)
-            if result["status"] != "success":
-                job_manager.update_job(job_id, status="error", error=result.get("error", "Erro desconhecido"))
-                return
-            zip_path = create_zip(Path(result["output_dir"]))
-            job_manager.update_job(
-                job_id,
-                status="done",
-                progress=100,
-                result_files={
-                    "entrada": result["entrada"],
-                    "entrada_acapella": result["entrada_acapella"],
-                    "entrada_instrumental": result["entrada_instrumental"],
-                    "saida_acapella": result["saida_acapella"],
-                    "saida": result["saida"],
-                    "video_output": result.get("video_output"),
-                    "zip": zip_path,
-                }
             )
-        except Exception as e:
-            job_manager.update_job(job_id, status="error", error=str(e))
-    thread = threading.Thread(target=process, daemon=False)
-    thread.start()
-    return f"✅ Conversão de vídeo iniciada! Job ID: {job_id}\n\nVá para a aba 'Jobs' para acompanhar."
-def refresh_jobs():
-    """Retorna lista de jobs com status"""
-    job_manager.load_jobs()
-    jobs = job_manager.list_jobs()
-    if not jobs:
-        return "Nenhuma conversão realizada ainda."
-    output = ""
-    for job in jobs:
-        status_icon = {
-            "waiting": "⏳ Esperando",
-            "converting": "🔄 Convertendo",
-            "done": "✅ Concluído",
-            "error": "❌ Erro",
-        }.get(job["status"], "❓")
-        output += f"**{status_icon}** - {job['filename']} (ID: `{job['id']}`)\n"
-        output += f"- Tipo: {job['type'].upper()}\n"
-        output += f"- Progresso: {job['progress']}%\n"
-        output += f"- Criado: {job['created_at']}\n"
-        if job["status"] == "done":
-            output += f"- 📦 ZIP: Disponível para download\n"
-        elif job["status"] == "error":
-            output += f"- Erro: {job['error']}\n"
-        output += "\n"
-    return output
-# ============================================================================
-# GRADIO UI (GRADIO 6.0 COMPATIBLE)
-# ============================================================================
-with gr.Blocks(title="RVC Voice Conversion") as demo:
-    gr.HTML("""
-    <div style="text-align: center; padding: 20px;">
-        <h1>🎙️ RVC Voice Conversion</h1>
-        <p>Conversão de voz simplificada · 5 áudios de saída · Processamento de vídeo</p>
-        <p style="color: #666;">CPU (Estável)</p>
     </div>
     """)
     with gr.Tabs():
-        # ── TAB 1: AUDIO ──────────────────────────────────────────
-        with gr.Tab("🎤 Converter Áudio"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    gr.Markdown("### 🔊 Entrada")
                     with gr.Tabs():
                         with gr.Tab("🎙️ Microfone"):
-                            audio_mic = gr.Audio(sources=["microphone"], type="filepath", label="Gravar")
-                        with gr.Tab("📁 Upload"):
-                            audio_file = gr.Audio(sources=["upload"], type="filepath", label="Upload de áudio")
-                    gr.Markdown("### ⚙️ Configurações")
-                    pitch = gr.Slider(-24, 24, value=0, step=1, label="Pitch Shift (semitons)")
-                    clean = gr.Checkbox(value=False, label="Redução de Ruído")
-                    reverb = gr.Checkbox(value=False, label="Reverb")
-                    convert_btn = gr.Button("🚀 Converter Áudio", variant="primary", size="lg")
-                    status_output = gr.Textbox(label="Status", interactive=False)
-        # ─��� TAB 2: VIDEO ──────────────────────────────────────────
-        with gr.Tab("🎬 Converter Vídeo"):
-            with gr.Row():
                 with gr.Column(scale=1):
-                    gr.Markdown("### 🎥 Entrada")
-                    gr.Markdown("Suporta: MP4, MKV, WebM, AVI, MOV, FLV")
-                    video_file = gr.Video(label="Upload de vídeo", sources=["upload"])
-                    gr.Markdown("### ⚙️ Configurações")
-                    video_pitch = gr.Slider(-24, 24, value=0, step=1, label="Pitch Shift (semitons)")
-                    video_clean = gr.Checkbox(value=False, label="Redução de Ruído")
-                    video_reverb = gr.Checkbox(value=False, label="Reverb")
-                    video_btn = gr.Button("🎬 Converter Vídeo", variant="primary", size="lg")
-                    video_status_output = gr.Textbox(label="Status", interactive=False)
-        # ── TAB 3: JOBS ──────────────────────────────────────────
-        with gr.Tab("📋 JOBS"):
-            gr.Markdown("### 📊 Status de Conversões")
-            gr.Markdown("Clique em **Refresh** ou espere auto-atualizar")
-            refresh_btn = gr.Button("🔄 Refresh", size="lg", variant="primary")
-            jobs_output = gr.Markdown()
-            refresh_btn.click(refresh_jobs, outputs=jobs_output)
-            demo.load(refresh_jobs, outputs=jobs_output)
-            # Auto-atualizar a cada 3 segundos
-            demo.load(
-                lambda: (refresh_jobs(),),
-                outputs=[jobs_output],
-                every=3
-            )
-            gr.Markdown("### ⬇️ Clique na setinha para expandir e ver os 5 áudios")
-            with gr.Accordion("📦 Resultados (Clique para expandir ⬇️)"):
-                job_id_input = gr.Textbox(label="🔑 Cole o Job ID aqui para ver os resultados", placeholder="Ex: a1b2c3d4")
-                show_results_btn = gr.Button("Mostrar Resultados", variant="primary")
-                with gr.Group():
-                    gr.Markdown("### 🎵 Áudios de Entrada")
-                    entrada_aud = gr.Audio(label="entrada.wav", interactive=False)
-                    entrada_acap = gr.Audio(label="entrada_acapella.wav", interactive=False)
-                    entrada_inst = gr.Audio(label="entrada_instrumental.wav", interactive=False)
-                with gr.Group():
-                    gr.Markdown("### 🎵 Áudios de Saída")
-                    saida_acap = gr.Audio(label="saida_acapella.wav", interactive=False)
-                    saida_aud = gr.Audio(label="saida.wav", interactive=False)
-                with gr.Group():
-                    gr.Markdown("### 🎬 Vídeo de Saída")
-                    video_output = gr.Video(label="saida_video.mp4", interactive=False)
-                with gr.Group():
-                    gr.Markdown("### 📥 Download")
-                    download_file = gr.File(label="Baixar ZIP com todos os arquivos")
-                def show_job_results(job_id_str):
-                    if not job_id_str or not job_id_str.strip():
-                        return None, None, None, None, None, None, None
-                    job = job_manager.get_job(job_id_str.strip())
-                    if not job or job["status"] != "done":
-                        return None, None, None, None, None, None, None
-                    files = job["result_files"]
-                    return (
-                        files["entrada"],
-                        files["entrada_acapella"],
-                        files["entrada_instrumental"],
-                        files["saida_acapella"],
-                        files["saida"],
-                        files.get("video_output"),
-                        files["zip"],
                     )
-                show_results_btn.click(
-                    show_job_results,
-                    inputs=job_id_input,
-                    outputs=[entrada_aud, entrada_acap, entrada_inst, saida_acap, saida_aud, video_output, download_file]
-                )
-    # Wire eventos
     convert_btn.click(
-        submit_audio_async,
-        inputs=[audio_mic, audio_file, pitch, clean, reverb],
-        outputs=status_output,
     )
-    video_btn.click(
-        submit_video_async,
-        inputs=[video_file, video_pitch, video_clean, video_reverb],
-        outputs=video_status_output,
     )
 if __name__ == "__main__":
     demo.launch(
-        share=True,
         server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True
     )

 """
+RVC Voice Conversion – HuggingFace Space
+Simple, fast, GPU/CPU auto-detected. Now with video upload and 5-output generation!
 """
 from __future__ import annotations
 import os
 import subprocess
 import tempfile
 from pathlib import Path
 import gradio as gr
+import torch
+from moviepy.video.io.VideoFileClip import VideoFileClip
+# Imports do seu código original
+from lib.config import (
+    BUILTIN_MODELS,
+    CSS,
+    DEVICE_LABEL,
+    MAX_INPUT_DURATION,
+    logger,
+)
+from lib.jobs import (
+    get_jobs_table,
+    get_queue_info,
+    poll_job,
+    submit_job,
+)
+from lib.models import list_models, startup_downloads
+from lib.ui import refresh_models, toggle_autotune, upload_model
+# ── Startup (original) ───────────────────────────────────────────────────────
+startup_status = ""
+default_model = ""
+try:
+    default_model = startup_downloads()
+    startup_status = f"✅ Ready &nbsp;·&nbsp; {DEVICE_LABEL}"
+except Exception as e:
+    startup_status = f"⚠️ Some assets unavailable: {e} &nbsp;·&nbsp; {DEVICE_LABEL}"
+    logger.warning("Startup download issue: %s", e)
+initial_models = list_models()
+initial_value = default_model if default_model in initial_models else (
+    initial_models[0] if initial_models else None
+)
+# ── NOVAS FUNÇÕES DE PROCESSAMENTO ──────────────────────────────────────────
 def extract_audio_from_video(video_path: str) -> str:
+    """
+    Extrai o áudio de um arquivo de vídeo usando moviepy.
+    Retorna o caminho do arquivo de áudio extraído (.mp3).
+    """
     try:
+        # Define um nome temporário para o arquivo de áudio
+        audio_path = video_path.replace('.mp4', '.mp3') if video_path.endswith('.mp4') else video_path + '.mp3'
+        if os.path.exists(audio_path):
+            return audio_path
+        with VideoFileClip(video_path) as video:
+            audio = video.audio
+            if audio is None:
+                raise ValueError("O arquivo de vídeo não possui nenhuma faixa de áudio.")
+            audio.write_audiofile(audio_path, logger=None)
+        return audio_path
     except Exception as e:
+        logger.error(f"Erro ao extrair áudio do vídeo: {e}")
+        raise gr.Error(f"Falha ao processar o vídeo: {e}")
+def separate_audio_stems(audio_path: str, output_dir: str) -> tuple[str, str]:
+    """
+    Usa o Demucs para separar o áudio em vocal (acapella) e instrumental.
+    Retorna o caminho para o acapella e para o instrumental.
+    """
     try:
+        # Define os caminhos de saída esperados
+        acapella_path = os.path.join(output_dir, "entrada_acapella.mp3")
+        instrumental_path = os.path.join(output_dir, "entrada_instrumental.mp3")
+        # Se os arquivos já existirem, retorna eles (cache)
+        if os.path.exists(acapella_path) and os.path.exists(instrumental_path):
+            return acapella_path, instrumental_path
+        # Configura o Demucs para separar apenas os vocais
+        # O modelo 'htdemucs' é o mais recente e de alta qualidade
+        # O parâmetro '--two-stems=vocals' faz o Demucs separar apenas vocais e o resto
         cmd = [
+            "python3", "-m", "demucs.separate",
+            "--two-stems=vocals",
+            "-n", "htdemucs",
+            "-d", "cpu" if not torch.cuda.is_available() else "cuda",
+            "-o", output_dir,
+            audio_path
         ]
+        logger.info(f"Executando separação Demucs: {' '.join(cmd)}")
+        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+        # O Demucs cria uma estrutura de pastas. Precisamos localizar os arquivos gerados.
+        base_name = Path(audio_path).stem
+        demucs_output = Path(output_dir) / "htdemucs" / base_name
+        if not demucs_output.exists():
+            # Se a estrutura for diferente, tenta encontrar recursivamente
+            for wav_file in Path(output_dir).rglob("vocals.wav"):
+                demucs_output = wav_file.parent
+                break
+        # Converte os .wav gerados para .mp3
+        vocals_wav = demucs_output / "vocals.wav"
+        no_vocals_wav = demucs_output / "no_vocals.wav"
+        if vocals_wav.exists():
+            # Converte para MP3 usando ffmpeg (mais leve e rápido)
+            subprocess.run([
+                "ffmpeg", "-y", "-i", str(vocals_wav), "-acodec", "libmp3lame", "-b:a", "192k", acapella_path
+            ], check=True, capture_output=True)
+        else:
+            raise FileNotFoundError(f"Arquivo vocals.wav não encontrado em {demucs_output}")
+        if no_vocals_wav.exists():
+            subprocess.run([
+                "ffmpeg", "-y", "-i", str(no_vocals_wav), "-acodec", "libmp3lame", "-b:a", "192k", instrumental_path
+            ], check=True, capture_output=True)
+        else:
+            raise FileNotFoundError(f"Arquivo no_vocals.wav não encontrado em {demucs_output}")
+        # Limpa os arquivos .wav temporários para economizar espaço
+        os.remove(vocals_wav)
+        os.remove(no_vocals_wav)
+        # Remove o diretório temporário do Demucs, se estiver vazio
+        if demucs_output.exists() and not any(demucs_output.iterdir()):
+            demucs_output.rmdir()
+        return acapella_path, instrumental_path
+    except subprocess.CalledProcessError as e:
+        logger.error(f"Erro na separação do Demucs: {e.stderr}")
+        raise gr.Error(f"Falha na separação das faixas. Verifique se o Demucs está instalado corretamente.")
     except Exception as e:
+        logger.error(f"Erro inesperado na separação: {e}")
+        raise
+def process_full_pipeline(
+    video_file: str | None,
+    audio_mic: str | None,
+    audio_file: str | None,
+    model: str,
+    pitch: int,
+    f0_method: str,
+    index_rate: float,
+    protect: float,
+    vol_env: float,
+    clean_cb: bool,
+    clean_strength: float,
+    split_cb: bool,
+    autotune_cb: bool,
+    autotune_strength: float,
+    filter_radius: int,
+    fmt: str,
+    reverb_cb: bool,
+    reverb_room: float,
+    reverb_damp: float,
+    reverb_wet: float,
+) -> tuple[str, str, str, str, str, str, str]:
+    """
+    Função principal que orquestra todo o novo pipeline:
+    1. Obtém o áudio de entrada (vídeo, microfone ou upload)
+    2. Extrai áudio se for vídeo
+    3. Aplica Demucs para separar acapella e instrumental
+    4. Converte o áudio original e o acapella usando RVC
+    5. Retorna os 5 arquivos de áudio mais o status
+    """
+    # Cria um diretório temporário único para esta execução
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        # --- PASSO 1: Determinar o arquivo de áudio fonte ---
+        input_audio_path = None
+        if video_file:
+            # É um vídeo, extrai o áudio
+            input_audio_path = extract_audio_from_video(video_file)
+            logger.info(f"Áudio extraído do vídeo: {input_audio_path}")
+        elif audio_mic:
+            input_audio_path = audio_mic
+            logger.info(f"Usando áudio do microfone: {input_audio_path}")
+        elif audio_file:
+            input_audio_path = audio_file
+            logger.info(f"Usando áudio enviado: {input_audio_path}")
+        else:
+            return "Erro: Nenhuma fonte de áudio ou vídeo foi fornecida.", "", "", "", "", "", ""
+        # Converte o áudio de entrada para um formato padrão (MP3) para facilitar
+        base_audio = os.path.join(tmp_dir, "entrada_original.mp3")
+        subprocess.run([
+            "ffmpeg", "-y", "-i", input_audio_path, "-acodec", "libmp3lame", "-b:a", "192k", base_audio
+        ], check=True, capture_output=True)
+        # --- PASSO 2: Separar acapella e instrumental com Demucs ---
+        logger.info("Iniciando separação Demucs...")
+        entrada_acapella, entrada_instrumental = separate_audio_stems(base_audio, tmp_dir)
+        logger.info(f"Separação concluída. Acapella: {entrada_acapella}, Instrumental: {entrada_instrumental}")
+        # --- PASSO 3: Aplicar RVC no áudio original e no acapella ---
+        # Precisamos de uma função de callback para o submit_job, pois ele espera um arquivo e retorna um job_id
+        # Como o submit_job é assíncrono e usa uma fila, vamos usá-lo diretamente.
+        # Para simplificar, vamos chamar submit_job duas vezes e esperar os resultados.
+        # Nota: submit_job retorna uma mensagem de status e um job_id. Precisamos de uma função que espere o job terminar.
+        # Vou criar uma função auxiliar para aguardar a conclusão.
+        def run_rvc_conversion(audio_path, model_name, output_name):
+            status, job_id = submit_job(
+                None, audio_path, model_name,  # inp_mic, inp_file, model
+                pitch, f0_method,
+                index_rate, protect, vol_env,
+                clean_cb, clean_strength,
+                split_cb, autotune_cb, autotune_strength,
+                filter_radius,
+                "mp3",  # formato fixo para consistência
+                reverb_cb, reverb_room, reverb_damp, reverb_wet
             )
+            # Aguarda o job terminar (polling)
+            import time
+            import re
+            job_id_match = re.search(r"[a-f0-9]{8}", job_id)
+            if not job_id_match:
+                raise Exception(f"Falha ao obter job_id: {status}")
+            job_id = job_id_match.group(0)
+            logger.info(f"Job {job_id} submetido para {output_name}")
+            # Poll até que o job esteja completo
+            while True:
+                time.sleep(1)
+                poll_status, output_file = poll_job(job_id)
+                if "completed" in poll_status.lower() or "done" in poll_status.lower():
+                    if output_file and os.path.exists(output_file):
+                        # Converte para MP3 se necessário
+                        final_output = os.path.join(tmp_dir, output_name)
+                        subprocess.run([
+                            "ffmpeg", "-y", "-i", output_file, "-acodec", "libmp3lame", "-b:a", "192k", final_output
+                        ], check=True, capture_output=True)
+                        return final_output
+                    else:
+                        raise Exception("Job concluído mas nenhum arquivo foi gerado.")
+                elif "failed" in poll_status.lower():
+                    raise Exception(f"Job {job_id} falhou: {poll_status}")
+        # Executa as duas conversões em paralelo para maior eficiência
+        from concurrent.futures import ThreadPoolExecutor
+        with ThreadPoolExecutor(max_workers=2) as executor:
+            future_original = executor.submit(run_rvc_conversion, base_audio, model, "saida.mp3")
+            future_acapella = executor.submit(run_rvc_conversion, entrada_acapella, model, "saida_acapella.mp3")
+            saida_path = future_original.result()
+            saida_acapella_path = future_acapella.result()
+        # Os arquivos já estão em MP3, prontos para serem retornados
+        # Precisamos copiá-los para fora do diretório temporário, ou o Gradio não conseguirá acessá-los.
+        # Vamos usar o diretório de saída permanente do espaço.
+        output_dir = Path("outputs")
+        output_dir.mkdir(exist_ok=True)
+        # Nomes finais
+        final_files = {
+            "entrada_acapella.mp3": entrada_acapella,
+            "entrada.mp3": base_audio,
+            "entrada_instrumental.mp3": entrada_instrumental,
+            "saida.mp3": saida_path,
+            "saida_acapella.mp3": saida_acapella_path,
+        }
+        # Copia para o diretório de saída
+        final_paths = {}
+        for name, src_path in final_files.items():
+            dest = output_dir / name
+            # Se o arquivo já existe, remove para garantir uma cópia nova
+            if dest.exists():
+                dest.unlink()
+            # Copia o arquivo (usando shutil para preservar metadados)
+            import shutil
+            shutil.copy2(src_path, dest)
+            final_paths[name] = str(dest)
+        status_msg = "✅ Conversão concluída com sucesso! Os 5 arquivos estão disponíveis abaixo."
+        return (
+            status_msg,
+            final_paths["entrada_acapella.mp3"],
+            final_paths["entrada.mp3"],
+            final_paths["entrada_instrumental.mp3"],
+            final_paths["saida.mp3"],
+            final_paths["saida_acapella.mp3"],
+            "processamento-finalizado"
+        )
+# ── Gradio UI (modificada) ───────────────────────────────────────────────────
+with gr.Blocks(title="RVC Voice Conversion - Full Suite", delete_cache=(3600, 3600)) as demo:
+    gr.HTML(f"""
+    <div id="header">
+        <h1>🎙️ RVC Voice Conversion - Full Suite</h1>
+        <p>Conversão de voz com suporte a vídeos, extração de acapella/instrumental e 5 saídas!</p>
     </div>
+    <p id="status">{startup_status}</p>
     """)
     with gr.Tabs():
+        with gr.Tab("🎤 Convert"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    gr.Markdown("### 🔊 Entrada de Áudio/Vídeo")
+                    # NOVO: Componente de vídeo
+                    video_input = gr.Video(
+                        label="Upload de Vídeo (MP4, WebM, etc.)",
+                        sources=["upload"],
+                        format="mp4",
+                        height=300,
+                    )
+                    gr.Markdown("--- OU ---")
                     with gr.Tabs():
                         with gr.Tab("🎙️ Microfone"):
+                            inp_mic = gr.Audio(
+                                sources=["microphone"],
+                                type="filepath",
+                                label="Gravar Áudio",
+                            )
+                        with gr.Tab("📁 Upload de Áudio"):
+                            inp_file = gr.Audio(
+                                sources=["upload"],
+                                type="filepath",
+                                label="Enviar Arquivo (wav, mp3, flac, etc.)",
+                            )
+                    gr.Markdown("### 🤖 Modelo")
+                    model_dd = gr.Dropdown(
+                        choices=initial_models,
+                        value=initial_value,
+                        label="Modelo de Voz Ativo",
+                        interactive=True,
+                    )
+                    gr.Markdown("### 🎚️ Configurações Básicas")
+                    pitch_sl = gr.Slider(
+                        minimum=-24, maximum=24, value=0, step=1,
+                        label="Pitch Shift (semitons)",
+                        info="0 = sem alteração · positivo = mais agudo · negativo = mais grave",
+                    )
+                    f0_radio = gr.Radio(
+                        choices=["rmvpe", "fcpe", "crepe", "crepe-tiny"],
+                        value="rmvpe",
+                        label="Método de Extração de Pitch",
+                        info="rmvpe = mais rápido · crepe = maior qualidade (mais lento)",
+                    )
                 with gr.Column(scale=1):
+                    gr.Markdown("### ⚙️ Configurações Avançadas")
+                    with gr.Accordion("Expandir opções avançadas", open=False):
+                        index_rate_sl = gr.Slider(
+                            0.0, 1.0, value=0.75, step=0.05,
+                            label="Index Rate",
+                            info="Força com que o índice FAISS influencia o timbre (0 = desligado)",
+                        )
+                        protect_sl = gr.Slider(
+                            0.0, 0.5, value=0.5, step=0.01,
+                            label="Proteção de Consoantes",
+                            info="0.5 = proteção máxima",
+                        )
+                        filter_radius_sl = gr.Slider(
+                            0, 7, value=3, step=1,
+                            label="Raio do Filtro de Respiração",
+                            info="Valores mais altos suavizam mais, reduzindo ruído de respiração",
+                        )
+                        vol_env_sl = gr.Slider(
+                            0.0, 1.0, value=0.25, step=0.05,
+                            label="Mistura de Envelope de Volume",
+                            info="0.25 = mistura natural · 1 = mantém volume original · 0 = saída do modelo",
+                        )
+                        with gr.Row():
+                            clean_cb = gr.Checkbox(value=False, label="Redução de Ruído")
+                            clean_sl = gr.Slider(
+                                0.0, 1.0, value=0.5, step=0.05,
+                                label="Intensidade",
+                            )
+                        with gr.Row():
+                            split_cb = gr.Checkbox(value=False, label="Dividir Áudio Longo")
+                            autotune_cb = gr.Checkbox(value=False, label="Autotune")
+                            autotune_sl = gr.Slider(
+                                0.0, 1.0, value=1.0, step=0.05,
+                                label="Intensidade do Autotune",
+                                visible=False,
+                            )
+                            autotune_cb.change(
+                                fn=toggle_autotune,
+                                inputs=autotune_cb,
+                                outputs=autotune_sl,
+                            )
+                    gr.Markdown("**🎛️ Reverb**")
+                    reverb_cb = gr.Checkbox(value=False, label="Habilitar Reverb")
+                    with gr.Group(visible=False) as reverb_group:
+                        reverb_room_sl = gr.Slider(
+                            0.0, 1.0, value=0.15, step=0.05,
+                            label="Tamanho da Sala",
+                        )
+                        reverb_damp_sl = gr.Slider(
+                            0.0, 1.0, value=0.7, step=0.05,
+                            label="Atenuação",
+                        )
+                        reverb_wet_sl = gr.Slider(
+                            0.0, 1.0, value=0.15, step=0.05,
+                            label="Nível Úmido",
+                        )
+                    reverb_cb.change(
+                        fn=lambda v: gr.update(visible=v),
+                        inputs=reverb_cb,
+                        outputs=reverb_group,
+                    )
+                    fmt_radio = gr.Radio(
+                        choices=["WAV", "MP3", "FLAC", "OPUS"],
+                        value="MP3",
+                        label="Formato de Saída",
+                        info="Para este pipeline, o formato MP3 é recomendado.",
+                    )
+                    convert_btn = gr.Button(
+                        "🚀 Iniciar Processamento Completo",
+                        variant="primary",
+                    )
+                    gr.Markdown("### 🎧 5 Saídas de Áudio")
+                    out_status = gr.Markdown(value="")
+                    with gr.Row():
+                        with gr.Column():
+                            gr.Markdown("#### 🎤 Entrada Original (sem RVC)")
+                            entrada_acapella = gr.Audio(label="Acapella Extraído", type="filepath", interactive=False)
+                            entrada_audio = gr.Audio(label="Áudio Original", type="filepath", interactive=False)
+                            entrada_instrumental = gr.Audio(label="Instrumental Extraído", type="filepath", interactive=False)
+                        with gr.Column():
+                            gr.Markdown("#### 🎙️ Saída com RVC")
+                            saida_audio = gr.Audio(label="Saída (RVC sobre áudio original)", type="filepath", interactive=False)
+                            saida_acapella = gr.Audio(label="Saída (RVC sobre acapella)", type="filepath", interactive=False)
+                    gr.Markdown("#### 🔍 Verificar Status do Job")
+                    with gr.Row():
+                        job_id_box = gr.Textbox(
+                            label="Job ID",
+                            placeholder="ex: a3f2b1c9",
+                            scale=3,
+                        )
+                        poll_btn = gr.Button("🔄 Verificar", scale=1)
+                    poll_status = gr.Markdown(value="")
+                    poll_audio = gr.Audio(label="Resultado", type="filepath", interactive=False)
+        # As outras abas permanecem EXATAMENTE como estavam (Models, Jobs, Help)
+        with gr.Tab("📦 Models"):
+            # ... (conteúdo original) ...
+            gr.Markdown("""
+            ### Upload de Modelo Customizado
+            Forneça um **`.zip`** contendo:
+            - **`model.pth`** — pesos (obrigatório)
+            - **`model.index`** — índice FAISS (opcional)
+            **Modelos integrados** (pré-baixados):
+            Vestia Zeta v1 · Vestia Zeta v2 · Ayunda Risu · Gawr Gura
+            """)
+            with gr.Row():
+                with gr.Column(scale=1):
+                    up_zip = gr.File(label="ZIP do Modelo", file_types=[".zip"])
+                    up_name = gr.Textbox(
+                        label="Nome do Modelo",
+                        placeholder="Deixe em branco para usar o nome do arquivo",
                     )
+                    up_btn = gr.Button("📤 Carregar Modelo", variant="primary")
+                    up_status = gr.Textbox(label="Status", interactive=False, lines=2)
+                with gr.Column(scale=1):
+                    gr.Markdown("### Modelos Carregados")
+                    models_table = gr.Dataframe(
+                        col_count=(1, "fixed"),
+                        value=[[m] for m in initial_models],
+                        interactive=False,
+                        label="",
+                    )
+                    refresh_btn = gr.Button("🔄 Atualizar")
+            up_btn.click(
+                fn=upload_model,
+                inputs=[up_zip, up_name],
+                outputs=[up_status, model_dd, models_table],
+            )
+            refresh_btn.click(
+                fn=refresh_models,
+                outputs=[models_table, model_dd],
+            )
+        with gr.Tab("📋 Jobs"):
+            # ... (conteúdo original) ...
+            gr.Markdown("Todos os jobs submetidos, do mais novo ao mais antigo. Clique em **Atualizar**.")
+            queue_status = gr.Markdown(value=get_queue_info, every=10)
+            jobs_table = gr.Dataframe(
+                headers=["Job ID", "Model", "Status", "Time", "Download"],
+                col_count=(5, "fixed"),
+                value=get_jobs_table,
+                interactive=False,
+                wrap=True,
+                datatype=["str", "str", "str", "str", "markdown"],
+                every=10,
+            )
+            refresh_jobs_btn = gr.Button("🔄 Atualizar")
+            def _refresh_jobs():
+                return get_queue_info(), get_jobs_table()
+            refresh_jobs_btn.click(fn=_refresh_jobs, outputs=[queue_status, jobs_table])
+        with gr.Tab("ℹ️ Help"):
+            # ... (conteúdo original) ...
+            gr.Markdown(f"""
+            ## Como Funciona
+            RVC (Retrieval-Based Voice Conversion) transforma uma gravação de voz para soar como um falante alvo, utilizando o modelo desse falante.
+            ---
+            ## Guia Rápido
+            1. Abra a aba **Convert**
+            2. Escolha uma fonte: **vídeo (MP4)**, **microfone** ou **upload de áudio**
+            3. Selecione um **modelo** no dropdown
+            4. Ajuste o **Pitch Shift** se necessário (ex: voz masculina → feminina: tente +12 semitons)
+            5. Clique em **Iniciar Processamento Completo** e aguarde
+            6. Os 5 arquivos de saída aparecerão na tela para ouvir e baixar
+            ---
+            ## Novidades na Versão Full Suite
+            * **Upload de vídeo**: Suporte a MP4 e outros formatos de vídeo (o áudio é extraído automaticamente).
+            * **Separação de faixas**: Usando Demucs, extraímos o acapella e o instrumental da sua entrada.
+            * **5 saídas**:
+                * `entrada_acapella.mp3` (vocal extraído)
+                * `entrada.mp3` (áudio original)
+                * `entrada_instrumental.mp3` (instrumental extraído)
+                * `saida.mp3` (RVC aplicado ao áudio original)
+                * `saida_acapella.mp3` (RVC aplicado apenas ao vocal)
+            * **Pipeline otimizado**: Processamento em paralelo para maior velocidade.
+            ---
+            ## Modelos Integrados
+            | Modelo | Descrição |
+            |---|---|
+            | **Vestia Zeta v1** | VTuber da Hololive ID, modelo v1 |
+            | **Vestia Zeta v2** | VTuber da Hololive ID, modelo v2 (recomendado) |
+            | **Ayunda Risu** | VTuber da Hololive ID |
+            | **Gawr Gura** | VTuber da Hololive EN |
+            ---
+            ## Métodos de Extração de Pitch
+            | Método | Velocidade | Qualidade | Melhor para |
+            |---|---|---|---|
+            | **rmvpe** | ⚡⚡⚡ | ★★★★ | Uso geral (padrão) |
+            | **fcpe** | ⚡⚡ | ★★★★ | Canto |
+            | **crepe** | ⚡ | ★★★★★ | Máxima qualidade, mais lento |
+            | **crepe-tiny** | ⚡⚡ | ★★★ | Baixo recurso |
+            ---
+            ## Configurações Avançadas
+            | Parâmetro | Descrição |
+            |---|---|
+            | **Index Rate** | Influência do índice FAISS (0.75 recomendado) |
+            | **Protect Consonants** | Protege consoantes não vozeadas (0.5 = máximo) |
+            | **Respiration Filter Radius** | Suaviza a curva de pitch — valores maiores reduzem ruído de respiração |
+            | **Volume Envelope Mix** | 0.25 = mistura natural · 1 = mantém volume original |
+            | **Noise Reduction** | Remove ruído de fundo antes da conversão |
+            | **Split Long Audio** | Divide áudios longos (>60s) em segmentos |
+            | **Autotune** | Ajusta o pitch para a nota musical mais próxima |
+            ---
+            **Dispositivo:** `{DEVICE_LABEL}`
+            **Duração máxima de entrada:** {MAX_INPUT_DURATION // 60} minutos
+            ---
+            ## Créditos
+            Engine: [Ultimate RVC](https://github.com/JackismyShephard/ultimate-rvc) · Separação: [Demucs](https://github.com/facebookresearch/demucs)
+            """)
+    # --- Conexão dos Eventos (novo botão de conversão) ---
     convert_btn.click(
+        fn=process_full_pipeline,
+        inputs=[
+            video_input, inp_mic, inp_file, model_dd,
+            pitch_sl, f0_radio,
+            index_rate_sl, protect_sl, vol_env_sl,
+            clean_cb, clean_sl,
+            split_cb, autotune_cb, autotune_sl,
+            filter_radius_sl,
+            fmt_radio,
+            reverb_cb, reverb_room_sl, reverb_damp_sl, reverb_wet_sl,
+        ],
+        outputs=[
+            out_status,
+            entrada_acapella, entrada_audio, entrada_instrumental,
+            saida_audio, saida_acapella,
+            job_id_box
+        ],
     )
+    def _poll_and_refresh(job_id):
+        status, file = poll_job(job_id)
+        return status, file, get_queue_info(), get_jobs_table()
+    poll_btn.click(
+        fn=_poll_and_refresh,
+        inputs=[job_id_box],
+        outputs=[poll_status, poll_audio, queue_status, jobs_table],
     )
+# ── Launch ────────────────────────────────────────────────────────────────────
 if __name__ == "__main__":
+    demo.queue(default_concurrency_limit=5)
     demo.launch(
         server_name="0.0.0.0",
+        server_port=int(os.getenv("PORT", 7860)),
+        max_threads=10,
+        ssr_mode=False,
+        css=CSS,
     )