Spaces:

baenacoco
/

talking-head-generate

Build error

App Files Files Community

baenacoco commited on Mar 8

Commit

2527849

verified ·

1 Parent(s): e291f1d

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

README.md +5 -6
app.py +573 -0
hub_utils.py +64 -0
packages.txt +6 -0
requirements.txt +19 -0

README.md CHANGED Viewed

@@ -1,12 +1,11 @@
 ---
-title: Talking Head Generate
-emoji: 📊
-colorFrom: blue
 colorTo: yellow
 sdk: gradio
-sdk_version: 6.9.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Talking Head - Generate
+emoji: 🎬
+colorFrom: red
 colorTo: yellow
 sdk: gradio
+sdk_version: 5.9.1
 app_file: app.py
 pinned: false
+hardware: a100-large
 ---

app.py ADDED Viewed

	@@ -0,0 +1,573 @@

+"""Space 5: Generate Video (F5-TTS + Flux.1 + MuseTalk)
+Downloads trained models from Hub -> TTS -> Image gen -> Lip-sync -> saves video to Hub.
+GPU: A100 (Flux.1 image gen + MuseTalk lip-sync)
+"""
+import gc
+import json
+import logging
+import os
+import shutil
+import subprocess
+import sys
+import traceback
+from pathlib import Path
+import gradio as gr
+import numpy as np
+import soundfile as sf
+import torch
+from hub_utils import download_step, upload_step
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
+logger = logging.getLogger(__name__)
+# ── Config ──
+IS_HF_SPACE = os.environ.get("SPACE_ID") is not None
+_data_path = Path("/data")
+if IS_HF_SPACE and _data_path.exists() and os.access(_data_path, os.W_OK):
+    BASE_DIR = _data_path
+else:
+    BASE_DIR = Path("data")
+VOICE_MODEL_DIR = BASE_DIR / "voice_model"
+LORA_MODEL_DIR = BASE_DIR / "lora_model"
+GENERATED_VIDEO_DIR = BASE_DIR / "generated"
+TEMP_DIR = BASE_DIR / "temp"
+HF_CACHE_DIR = BASE_DIR / "hf_cache"
+for d in [VOICE_MODEL_DIR, LORA_MODEL_DIR, GENERATED_VIDEO_DIR, TEMP_DIR, HF_CACHE_DIR]:
+    d.mkdir(parents=True, exist_ok=True)
+os.environ["HF_HOME"] = str(HF_CACHE_DIR)
+os.environ["TRANSFORMERS_CACHE"] = str(HF_CACHE_DIR)
+FLUX_MODEL_ID = "black-forest-labs/FLUX.1-dev"
+F5_SPANISH_MODEL_ID = "jpgallegoar/F5-Spanish"
+MUSETALK_REPO_ID = "TMElyralab/MuseTalk"
+LORA_TRIGGER_WORD = "alvaro_person"
+IMAGE_WIDTH = 1024
+IMAGE_HEIGHT = 1024
+IMAGE_STEPS = 30
+IMAGE_GUIDANCE = 3.5
+TTS_SPEED = 1.0
+MUSETALK_FPS = 30
+MUSETALK_BBOX_SHIFT = 5
+CHUNK_DURATION_S = 10
+CROSSFADE_DURATION_S = 0.5
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+APP_VERSION = "1.0.0"
+# ── Model state ──
+_f5_model = None
+_flux_pipe = None
+MUSETALK_DIR = Path("musetalk_repo")
+def _clear_cache():
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.synchronize()
+def _unload_all():
+    global _f5_model, _flux_pipe
+    if _f5_model is not None:
+        del _f5_model
+        _f5_model = None
+    if _flux_pipe is not None:
+        del _flux_pipe
+        _flux_pipe = None
+    _clear_cache()
+# ── FFmpeg utils ──
+def _ffmpeg_run(cmd, description):
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    if result.returncode != 0:
+        raise RuntimeError(f"FFmpeg failed ({description}): {result.stderr[-500:]}")
+def _get_duration(file_path):
+    cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration",
+           "-of", "default=noprint_wrappers=1:nokey=1", file_path]
+    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+    return float(result.stdout.strip())
+def _concat_videos(video_paths, output_path):
+    list_file = Path(output_path).parent / "concat_list.txt"
+    with open(list_file, "w") as f:
+        for vp in video_paths:
+            f.write(f"file '{vp}'\n")
+    _ffmpeg_run(["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(list_file), "-c", "copy", output_path], "concat")
+    list_file.unlink(missing_ok=True)
+def _crossfade_videos(v1, v2, output, duration=0.5):
+    dur1 = _get_duration(v1)
+    offset = dur1 - duration
+    _ffmpeg_run([
+        "ffmpeg", "-y", "-i", v1, "-i", v2,
+        "-filter_complex", f"[0:v][1:v]xfade=transition=fade:duration={duration}:offset={offset}[v]",
+        "-map", "[v]", "-c:v", "libx264", "-pix_fmt", "yuv420p", output,
+    ], "crossfade")
+def _mux_audio_video(video, audio, output):
+    _ffmpeg_run([
+        "ffmpeg", "-y", "-i", video, "-i", audio,
+        "-c:v", "copy", "-c:a", "aac", "-b:a", "192k",
+        "-map", "0:v:0", "-map", "1:a:0", "-shortest", output,
+    ], "mux")
+# ── TTS ──
+def _load_tts():
+    global _f5_model
+    if _f5_model is not None:
+        return
+    _unload_all()
+    from f5_tts.api import F5TTS
+    finetuned_path = VOICE_MODEL_DIR / "model_last.pt"
+    if not finetuned_path.exists():
+        checkpoints = list(VOICE_MODEL_DIR.glob("*.pt")) + list(VOICE_MODEL_DIR.glob("*.safetensors"))
+        finetuned_path = checkpoints[0] if checkpoints else None
+    if finetuned_path and finetuned_path.exists():
+        logger.info(f"Loading fine-tuned F5-TTS from {finetuned_path}")
+        _f5_model = F5TTS(model_path=str(finetuned_path), device=DEVICE)
+    else:
+        logger.info(f"Loading base F5-Spanish from {F5_SPANISH_MODEL_ID}")
+        _f5_model = F5TTS(model_name=F5_SPANISH_MODEL_ID, device=DEVICE)
+    logger.info("F5-TTS loaded")
+def _get_reference_audio():
+    ref = VOICE_MODEL_DIR / "reference.wav"
+    if ref.exists():
+        return str(ref)
+    raise FileNotFoundError("No reference audio found. Download voice model first.")
+def generate_speech(text, output_path=None):
+    _load_tts()
+    ref_audio = _get_reference_audio()
+    if output_path is None:
+        output_path = str(TEMP_DIR / "tts_output.wav")
+    audio, sr = _f5_model.infer(ref_file=ref_audio, ref_text="", gen_text=text, speed=TTS_SPEED)
+    sf.write(output_path, audio, sr)
+    logger.info(f"Generated speech: {output_path} ({len(audio)/sr:.1f}s)")
+    return output_path
+def _unload_tts():
+    global _f5_model
+    if _f5_model is not None:
+        del _f5_model
+        _f5_model = None
+    _clear_cache()
+# ── Image generation ──
+def _load_flux():
+    global _flux_pipe
+    if _flux_pipe is not None:
+        return
+    _unload_tts()
+    from diffusers import FluxPipeline
+    logger.info(f"Loading Flux.1 from {FLUX_MODEL_ID}...")
+    _flux_pipe = FluxPipeline.from_pretrained(
+        FLUX_MODEL_ID, torch_dtype=torch.bfloat16,
+        token=os.environ.get("HF_TOKEN"),
+    ).to(DEVICE)
+    lora_weights = list(LORA_MODEL_DIR.glob("*.safetensors"))
+    if not lora_weights:
+        lora_weights = list(LORA_MODEL_DIR.glob("adapter_model.*"))
+    if lora_weights:
+        try:
+            _flux_pipe.load_lora_weights(str(LORA_MODEL_DIR))
+            logger.info("LoRA weights loaded")
+        except Exception as e:
+            logger.warning(f"Could not load LoRA: {e}")
+    _flux_pipe.enable_model_cpu_offload()
+    logger.info("Flux.1 pipeline loaded")
+def _unload_flux():
+    global _flux_pipe
+    if _flux_pipe is not None:
+        del _flux_pipe
+        _flux_pipe = None
+    _clear_cache()
+def generate_image(prompt, num_steps, guidance_scale, seed, output_path=None):
+    _load_flux()
+    config_path = LORA_MODEL_DIR / "lora_config.json"
+    trigger = LORA_TRIGGER_WORD
+    if config_path.exists():
+        with open(config_path) as f:
+            trigger = json.load(f).get("trigger_word", LORA_TRIGGER_WORD)
+    if trigger and trigger not in prompt:
+        prompt = f"{trigger}, {prompt}"
+    generator = None
+    if seed >= 0:
+        generator = torch.Generator(device=DEVICE).manual_seed(seed)
+    if output_path is None:
+        output_path = str(TEMP_DIR / "generated_avatar.png")
+    result = _flux_pipe(
+        prompt=prompt, width=IMAGE_WIDTH, height=IMAGE_HEIGHT,
+        num_inference_steps=num_steps, guidance_scale=guidance_scale,
+        generator=generator,
+    )
+    result.images[0].save(output_path)
+    logger.info(f"Image saved: {output_path}")
+    return output_path
+# ── MuseTalk lip-sync ──
+def _ensure_mm_packages():
+    try:
+        import mmcv
+        return
+    except ImportError:
+        pass
+    logger.info("Installing mmcv, mmdet, mmpose via mim...")
+    for pkg in ["mmengine", "mmcv>=2.0.0", "mmdet>=3.1.0", "mmpose>=1.1.0"]:
+        subprocess.run([sys.executable, "-m", "mim", "install", pkg],
+                       capture_output=True, text=True, timeout=600)
+def _ensure_musetalk():
+    _ensure_mm_packages()
+    if not MUSETALK_DIR.exists():
+        logger.info("Cloning MuseTalk repository...")
+        try:
+            subprocess.run(
+                ["git", "clone", "https://github.com/TMElyralab/MuseTalk.git", str(MUSETALK_DIR)],
+                capture_output=True, text=True, timeout=300, check=True,
+            )
+        except Exception:
+            from huggingface_hub import snapshot_download
+            snapshot_download(repo_id=MUSETALK_REPO_ID, local_dir=str(MUSETALK_DIR), repo_type="model")
+    _download_musetalk_models()
+def _download_musetalk_models():
+    from huggingface_hub import hf_hub_download
+    models = [
+        ("TMElyralab/MuseTalk", "models/musetalk/musetalk.json"),
+        ("TMElyralab/MuseTalk", "models/musetalk/pytorch_model.bin"),
+        ("TMElyralab/MuseTalk", "models/dwpose/dw-ll_ucoco_384.onnx"),
+        ("TMElyralab/MuseTalk", "models/face-parse-bisenet/79999_iter.pth"),
+        ("TMElyralab/MuseTalk", "models/sd-vae-ft-mse/config.json"),
+        ("TMElyralab/MuseTalk", "models/sd-vae-ft-mse/diffusion_pytorch_model.bin"),
+        ("TMElyralab/MuseTalk", "models/whisper/tiny.pt"),
+    ]
+    for repo_id, filename in models:
+        local_path = MUSETALK_DIR / filename
+        if not local_path.exists():
+            try:
+                hf_hub_download(repo_id=repo_id, filename=filename, local_dir=str(MUSETALK_DIR))
+            except Exception as e:
+                logger.warning(f"Could not download {filename}: {e}")
+def _generate_lipsync(image_path, audio_path, output_path, bbox_shift):
+    _unload_all()
+    _ensure_musetalk()
+    # Try Python API
+    try:
+        sys.path.insert(0, str(MUSETALK_DIR))
+        from musetalk.models.musetalk import MuseTalk
+        model = MuseTalk()
+        model.load_model(str(MUSETALK_DIR / "models"))
+        result = model.inference(
+            video_path=image_path, audio_path=audio_path,
+            bbox_shift=bbox_shift, result_dir=str(Path(output_path).parent),
+        )
+        if result and Path(result).exists():
+            if str(result) != output_path:
+                shutil.move(result, output_path)
+            return output_path
+    except Exception as e:
+        logger.warning(f"Python MuseTalk failed: {e}, trying CLI...")
+    # Fallback to CLI
+    result_dir = TEMP_DIR / "musetalk_output"
+    result_dir.mkdir(parents=True, exist_ok=True)
+    cmd = [
+        sys.executable, "-m", "scripts.inference",
+        "--video_path", image_path, "--audio_path", audio_path,
+        "--bbox_shift", str(bbox_shift), "--result_dir", str(result_dir),
+        "--fps", str(MUSETALK_FPS), "--batch_size", "8",
+    ]
+    env = os.environ.copy()
+    env["PYTHONPATH"] = str(MUSETALK_DIR) + ":" + env.get("PYTHONPATH", "")
+    proc = subprocess.run(cmd, capture_output=True, text=True, cwd=str(MUSETALK_DIR), env=env, timeout=1800)
+    if proc.returncode != 0:
+        raise RuntimeError(f"MuseTalk failed: {proc.stderr[-500:]}")
+    outputs = sorted(result_dir.glob("**/*.mp4"), key=lambda p: p.stat().st_mtime, reverse=True)
+    if not outputs:
+        raise RuntimeError("MuseTalk did not produce output")
+    shutil.move(str(outputs[0]), output_path)
+    shutil.rmtree(result_dir, ignore_errors=True)
+    return output_path
+# ── Video composition ──
+def _find_silence_boundaries(audio, sr, chunk_duration):
+    from pydub import AudioSegment
+    from pydub.silence import detect_silence
+    temp_path = str(TEMP_DIR / "_temp_silence.wav")
+    sf.write(temp_path, audio, sr)
+    sound = AudioSegment.from_wav(temp_path)
+    silences = detect_silence(sound, min_silence_len=300, silence_thresh=-35)
+    total_duration = len(audio) / sr
+    boundaries = [0.0]
+    current = 0.0
+    while current + chunk_duration < total_duration:
+        target = current + chunk_duration
+        best_split = target
+        best_dist = float("inf")
+        for start_ms, end_ms in silences:
+            mid = (start_ms + end_ms) / 2000.0
+            if current + 3.0 < mid < total_duration - 1.0:
+                dist = abs(mid - target)
+                if dist < best_dist:
+                    best_dist = dist
+                    best_split = mid
+        boundaries.append(best_split)
+        current = best_split
+    boundaries.append(total_duration)
+    Path(temp_path).unlink(missing_ok=True)
+    return boundaries
+def compose_long_video(image_path, audio_path, output_path, bbox_shift, progress_callback=None):
+    audio, sr = sf.read(audio_path)
+    if audio.ndim > 1:
+        audio = audio.mean(axis=1)
+    total_duration = len(audio) / sr
+    if total_duration <= CHUNK_DURATION_S * 1.5:
+        if progress_callback:
+            progress_callback(0.1, "Generando lip-sync...")
+        return _generate_lipsync(image_path, audio_path, output_path, bbox_shift)
+    work_dir = TEMP_DIR / "compose_work"
+    if work_dir.exists():
+        shutil.rmtree(work_dir)
+    work_dir.mkdir(parents=True)
+    if progress_callback:
+        progress_callback(0.05, "Buscando puntos de corte...")
+    boundaries = _find_silence_boundaries(audio, sr, CHUNK_DURATION_S)
+    n_chunks = len(boundaries) - 1
+    chunk_videos = []
+    for i in range(n_chunks):
+        if progress_callback:
+            progress_callback(0.1 + (i / n_chunks) * 0.7, f"Generando chunk {i+1}/{n_chunks}...")
+        start_sample = int(boundaries[i] * sr)
+        end_sample = int(boundaries[i + 1] * sr)
+        chunk_audio = audio[start_sample:end_sample]
+        chunk_audio_path = str(work_dir / f"chunk_{i:03d}.wav")
+        sf.write(chunk_audio_path, chunk_audio, sr)
+        chunk_video_path = str(work_dir / f"chunk_{i:03d}.mp4")
+        _generate_lipsync(image_path, chunk_audio_path, chunk_video_path, bbox_shift)
+        chunk_videos.append(chunk_video_path)
+    if progress_callback:
+        progress_callback(0.85, "Componiendo video final...")
+    if len(chunk_videos) == 1:
+        final_video = chunk_videos[0]
+    elif CROSSFADE_DURATION_S > 0:
+        current = chunk_videos[0]
+        for i in range(1, len(chunk_videos)):
+            merged = str(work_dir / f"merged_{i:03d}.mp4")
+            try:
+                _crossfade_videos(current, chunk_videos[i], merged, CROSSFADE_DURATION_S)
+                current = merged
+            except Exception:
+                _concat_videos([current, chunk_videos[i]], merged)
+                current = merged
+        final_video = current
+    else:
+        final_video = str(work_dir / "concat.mp4")
+        _concat_videos(chunk_videos, final_video)
+    _mux_audio_video(final_video, audio_path, output_path)
+    shutil.rmtree(work_dir, ignore_errors=True)
+    return output_path
+# ── Gradio handlers ──
+def download_models_from_hub(project_name, progress=gr.Progress()):
+    if not project_name or not project_name.strip():
+        return "Error: Debes introducir un nombre de proyecto"
+    name = project_name.strip()
+    try:
+        status_parts = []
+        # Download voice model
+        if VOICE_MODEL_DIR.exists():
+            shutil.rmtree(VOICE_MODEL_DIR)
+        VOICE_MODEL_DIR.mkdir(parents=True)
+        download_step(name, "step3_voice", str(BASE_DIR))
+        src = BASE_DIR / name / "step3_voice"
+        if src.exists():
+            for f in src.iterdir():
+                shutil.move(str(f), str(VOICE_MODEL_DIR / f.name))
+            status_parts.append("voz")
+        # Download LoRA model
+        if LORA_MODEL_DIR.exists():
+            shutil.rmtree(LORA_MODEL_DIR)
+        LORA_MODEL_DIR.mkdir(parents=True)
+        download_step(name, "step4_lora", str(BASE_DIR))
+        src = BASE_DIR / name / "step4_lora"
+        if src.exists():
+            for f in src.iterdir():
+                shutil.move(str(f), str(LORA_MODEL_DIR / f.name))
+            status_parts.append("LoRA")
+        shutil.rmtree(BASE_DIR / name, ignore_errors=True)
+        return f"OK - Descargados modelos: {', '.join(status_parts)}"
+    except Exception as e:
+        return f"Error: {e}"
+def generate_video_handler(
+    project_name, text, scene_prompt, bbox_shift,
+    img_steps, guidance, seed, progress=gr.Progress(),
+):
+    if not project_name or not project_name.strip():
+        return None, "Error: Debes introducir un nombre de proyecto"
+    if not text.strip():
+        return None, "Error: Introduce texto para hablar"
+    logger.info(f"=== Video Generation Started === text='{text[:50]}...'")
+    try:
+        # Step 1: TTS
+        progress(0.0, desc="Generando voz con TTS...")
+        audio_path = generate_speech(text)
+        # Step 2: Image generation
+        progress(0.2, desc="Generando imagen con Flux.1 + LoRA...")
+        image_path = generate_image(
+            prompt=scene_prompt, num_steps=int(img_steps),
+            guidance_scale=guidance, seed=int(seed),
+        )
+        # Unload Flux before MuseTalk
+        _unload_flux()
+        # Step 3: Lip-sync
+        progress(0.4, desc="Generando lip-sync con MuseTalk...")
+        output_path = str(GENERATED_VIDEO_DIR / "final_output.mp4")
+        compose_long_video(
+            image_path=image_path, audio_path=audio_path,
+            output_path=output_path, bbox_shift=int(bbox_shift),
+            progress_callback=lambda p, m: progress(0.4 + p * 0.6, desc=m),
+        )
+        logger.info("=== Video Generation Complete ===")
+        return output_path, "OK - Video generado!"
+    except Exception as e:
+        logger.error(f"=== Video Generation Failed ===\n{traceback.format_exc()}")
+        return None, f"Error: {e}"
+def save_to_hub(project_name):
+    if not project_name or not project_name.strip():
+        return "Error: Debes introducir un nombre de proyecto"
+    name = project_name.strip()
+    videos = list(GENERATED_VIDEO_DIR.glob("*.mp4"))
+    if not videos:
+        return "Error: No hay video para guardar."
+    try:
+        return upload_step(name, "step5_video", str(GENERATED_VIDEO_DIR))
+    except Exception as e:
+        return f"Error: {e}"
+# ── UI ──
+with gr.Blocks(title="Talking Head - Generate", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(f"# Talking Head - Generar Video `v{APP_VERSION}`\nTTS + Imagen + Lip-sync con modelos entrenados")
+    project_name = gr.Textbox(
+        label="Nombre del proyecto",
+        placeholder="mi_proyecto",
+        info="Obligatorio. Se usa como carpeta en el Hub.",
+    )
+    gr.Markdown("### 1. Descargar modelos del Hub")
+    download_btn = gr.Button("Descargar modelos del Hub", variant="secondary")
+    download_status = gr.Textbox(label="Estado descarga", interactive=False)
+    gr.Markdown("### 2. Generar video")
+    with gr.Row():
+        with gr.Column():
+            text_input = gr.Textbox(
+                label="Texto a hablar (espanol)",
+                placeholder="Hola, soy un avatar digital hiperrealista...",
+                lines=4,
+            )
+            scene_prompt = gr.Textbox(
+                label="Prompt de escena",
+                value="portrait photo, professional lighting, neutral background",
+            )
+            with gr.Row():
+                bbox_shift = gr.Slider(-20, 20, value=MUSETALK_BBOX_SHIFT, step=1, label="Bbox Shift")
+                img_steps = gr.Slider(10, 50, value=IMAGE_STEPS, step=5, label="Image Steps")
+            with gr.Row():
+                guidance = gr.Slider(1.0, 10.0, value=IMAGE_GUIDANCE, step=0.5, label="Guidance Scale")
+                seed_input = gr.Number(value=-1, label="Seed (-1=random)")
+            gen_btn = gr.Button("Generar Video", variant="primary")
+        with gr.Column():
+            video_output = gr.Video(label="Video generado")
+            gen_status = gr.Textbox(label="Estado", interactive=False)
+    gr.Markdown("### 3. Guardar video en Hub")
+    save_btn = gr.Button("Guardar en Hub", variant="secondary")
+    save_status = gr.Textbox(label="Estado guardado", interactive=False)
+    download_btn.click(download_models_from_hub, inputs=[project_name], outputs=[download_status])
+    gen_btn.click(
+        generate_video_handler,
+        inputs=[project_name, text_input, scene_prompt, bbox_shift, img_steps, guidance, seed_input],
+        outputs=[video_output, gen_status],
+    )
+    save_btn.click(save_to_hub, inputs=[project_name], outputs=[save_status])
+if __name__ == "__main__":
+    demo.queue().launch(server_name="0.0.0.0", server_port=7860)

hub_utils.py ADDED Viewed

	@@ -0,0 +1,64 @@

+"""Hub utilities for uploading/downloading step data to HF Dataset repo."""
+import os
+import logging
+from pathlib import Path
+from huggingface_hub import HfApi, hf_hub_download, list_repo_tree
+logger = logging.getLogger(__name__)
+HF_DATASET_REPO_ID = "baenacoco/talking-head-avatar"
+def _get_api():
+    token = os.environ.get("HF_TOKEN")
+    if not token:
+        raise ValueError("HF_TOKEN no encontrado en variables de entorno")
+    api = HfApi(token=token)
+    api.create_repo(repo_id=HF_DATASET_REPO_ID, repo_type="dataset", exist_ok=True)
+    return api
+def upload_step(name: str, step_folder: str, local_dir: str):
+    """Upload a local directory to {name}/{step_folder}/ in the dataset repo."""
+    api = _get_api()
+    api.upload_folder(
+        folder_path=local_dir,
+        path_in_repo=f"{name}/{step_folder}",
+        repo_id=HF_DATASET_REPO_ID,
+        repo_type="dataset",
+    )
+    logger.info(f"Uploaded {local_dir} -> {name}/{step_folder}")
+    return f"Subido a Hub: {name}/{step_folder}"
+def download_step(name: str, step_folder: str, local_dir: str):
+    """Download {name}/{step_folder}/ from the dataset repo to a local directory."""
+    from huggingface_hub import snapshot_download
+    token = os.environ.get("HF_TOKEN")
+    snapshot_download(
+        repo_id=HF_DATASET_REPO_ID,
+        repo_type="dataset",
+        local_dir=local_dir,
+        allow_patterns=[f"{name}/{step_folder}/**"],
+        token=token,
+    )
+    logger.info(f"Downloaded {name}/{step_folder} -> {local_dir}")
+    return f"Descargado de Hub: {name}/{step_folder}"
+def list_projects() -> list[str]:
+    """List project names (top-level folders) in the dataset repo."""
+    token = os.environ.get("HF_TOKEN")
+    try:
+        api = HfApi(token=token)
+        entries = list(api.list_repo_tree(
+            repo_id=HF_DATASET_REPO_ID, repo_type="dataset", path_in_repo="",
+        ))
+        return sorted(set(
+            e.rfilename.split("/")[0] if hasattr(e, "rfilename") else e.path.split("/")[0]
+            for e in entries
+            if ("/" in getattr(e, "rfilename", "")) or hasattr(e, "path")
+        ))
+    except Exception as e:
+        logger.warning(f"Could not list projects: {e}")
+        return []

packages.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+ffmpeg
+libgl1-mesa-glx
+libglib2.0-0
+libsm6
+libxext6
+libxrender-dev

requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+setuptools>=69.0.0
+gradio>=5.9.1
+torch>=2.1.0
+torchaudio>=2.1.0
+torchvision>=0.16.0
+transformers>=4.36.0,<5.0.0
+diffusers>=0.25.0
+accelerate>=0.25.0
+safetensors>=0.4.0
+peft>=0.7.0
+huggingface_hub>=0.20.0
+numpy>=1.24.0
+Pillow>=10.0.0
+soundfile>=0.12.0
+pydub>=0.25.1
+f5-tts>=0.3.0
+sentencepiece>=0.1.99
+protobuf>=3.20.0
+openmim>=0.3.9