Spaces:

isYes
/

HuMoGen-X_Demo

Sleeping

App Files Files Community

Daye-Lee18 commited on Dec 22, 2025

Commit

a30a67a

1 Parent(s): f59528b

app.py modified version

Browse files

Files changed (3) hide show

app.py +240 -132
old_app.py +154 -0
requirements.txt +8 -0

app.py CHANGED Viewed

@@ -1,154 +1,262 @@
 import gradio as gr
 import numpy as np
-import random
-# import spaces #[uncomment to use ZeroGPU]
-from diffusers import DiffusionPipeline
 import torch
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model_repo_id = "stabilityai/sdxl-turbo"  # Replace to the model you would like to use
-if torch.cuda.is_available():
-    torch_dtype = torch.float16
-else:
-    torch_dtype = torch.float32
-pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
-pipe = pipe.to(device)
-MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 1024
-# @spaces.GPU #[uncomment to use ZeroGPU]
-def infer(
-    prompt,
-    negative_prompt,
-    seed,
-    randomize_seed,
-    width,
-    height,
-    guidance_scale,
-    num_inference_steps,
-    progress=gr.Progress(track_tqdm=True),
 ):
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator().manual_seed(seed)
-    image = pipe(
-        prompt=prompt,
-        negative_prompt=negative_prompt,
-        guidance_scale=guidance_scale,
-        num_inference_steps=num_inference_steps,
-        width=width,
-        height=height,
-        generator=generator,
-    ).images[0]
-    return image, seed
-examples = [
-    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
-    "An astronaut riding a green horse",
-    "A delicious ceviche cheesecake slice",
-]
-css = """
-#col-container {
-    margin: 0 auto;
-    max-width: 640px;
-}
-"""
-with gr.Blocks(css=css) as demo:
-    with gr.Column(elem_id="col-container"):
-        gr.Markdown(" # Text-to-Image Gradio Template")
-        with gr.Row():
-            prompt = gr.Text(
-                label="Prompt",
-                show_label=False,
-                max_lines=1,
-                placeholder="Enter your prompt",
-                container=False,
-            )
-            run_button = gr.Button("Run", scale=0, variant="primary")
-        result = gr.Image(label="Result", show_label=False)
-        with gr.Accordion("Advanced Settings", open=False):
-            negative_prompt = gr.Text(
-                label="Negative prompt",
-                max_lines=1,
-                placeholder="Enter a negative prompt",
-                visible=False,
-            )
-            seed = gr.Slider(
-                label="Seed",
-                minimum=0,
-                maximum=MAX_SEED,
-                step=1,
-                value=0,
-            )
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-            with gr.Row():
-                width = gr.Slider(
-                    label="Width",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024,  # Replace with defaults that work for your model
-                )
-                height = gr.Slider(
-                    label="Height",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024,  # Replace with defaults that work for your model
-                )
-            with gr.Row():
-                guidance_scale = gr.Slider(
-                    label="Guidance scale",
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=0.0,  # Replace with defaults that work for your model
-                )
-                num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
-                    minimum=1,
-                    maximum=50,
-                    step=1,
-                    value=2,  # Replace with defaults that work for your model
-                )
-        gr.Examples(examples=examples, inputs=[prompt])
-    gr.on(
-        triggers=[run_button.click, prompt.submit],
-        fn=infer,
-        inputs=[
-            prompt,
-            negative_prompt,
-            seed,
-            randomize_seed,
-            width,
-            height,
-            guidance_scale,
-            num_inference_steps,
-        ],
-        outputs=[result, seed],
     )
 if __name__ == "__main__":
     demo.launch()

+import os
+import tempfile
+from pathlib import Path
+from typing import Tuple, Optional
 import gradio as gr
 import numpy as np
 import torch
+import soundfile as sf
+import librosa
+from huggingface_hub import hf_hub_download
+# -----------------------------
+# Config
+# -----------------------------
+DEFAULT_WEIGHTS_REPO = os.environ.get("WEIGHTS_REPO", "isYes/HuMoGen-X-weights")  # private model repo
+WEIGHTS_FILENAME = os.environ.get("WEIGHTS_FILENAME", "train-0090.pt")                # in the private repo
+# Space는 CPU일 수도 있고 GPU일 수도 있음
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# -----------------------------
+# Secure download + load
+# -----------------------------
+@gr.cache_resource
+def load_model():
+    """
+    Loads model weights from a PRIVATE HF repo using HF_TOKEN (Space Secret).
+    Cache_resource ensures we load only once per Space runtime.
+    """
+    token = os.environ.get("HF_TOKEN")
+    if not token:
+        raise RuntimeError(
+            "HF_TOKEN secret is missing. Set it in Space Settings -> Secrets."
+        )
+    ckpt_path = hf_hub_download(
+        repo_id=DEFAULT_WEIGHTS_REPO,
+        filename=WEIGHTS_FILENAME,
+        token=token,
+    )
+    # TODO: replace this with your actual model class init + load_state_dict
+    # Example patterns:
+    #   model = HuMoGenX(...)
+    #   state = torch.load(ckpt_path, map_location="cpu")
+    #   model.load_state_dict(state["state_dict"] if "state_dict" in state else state)
+    #   model.to(DEVICE).eval()
+    #
+    # Here we keep a placeholder "model" object.
+    model = torch.load(ckpt_path, map_location="cpu")
+    if hasattr(model, "to"):
+        model = model.to(DEVICE)
+    if hasattr(model, "eval"):
+        model.eval()
+    return model
+# -----------------------------
+# Utilities
+# -----------------------------
+def load_audio_mono_16k(audio_path: str, target_sr: int = 16000) -> Tuple[np.ndarray, int]:
+    """
+    Loads audio file and converts to mono float32 at target_sr.
+    """
+    y, sr = librosa.load(audio_path, sr=target_sr, mono=True)
+    y = y.astype(np.float32)
+    return y, target_sr
+def render_motion_to_mp4(
+    motion: np.ndarray,
+    out_mp4_path: str,
+    fps: int = 30,
+    resolution: int = 512,
 ):
+    """
+    TODO: Replace this with your real renderer.
+    This function should create an mp4 from the generated motion.
+    - motion: (T, D) or (T, J, 3) etc.
+    - out_mp4_path: path to save mp4
+    Options:
+    1) lightweight: matplotlib stick figure -> imageio mp4
+    2) medium: pyrender / trimesh
+    3) heavy: Blender (보통 Space에선 비추)
+    For now, we'll create a dummy black video so the UI pipeline is complete.
+    """
+    import imageio.v2 as imageio
+    T = int(motion.shape[0]) if motion is not None else 60
+    frames = []
+    for _ in range(T):
+        frame = np.zeros((resolution, resolution, 3), dtype=np.uint8)
+        frames.append(frame)
+    writer = imageio.get_writer(out_mp4_path, fps=fps)
+    for f in frames:
+        writer.append_data(f)
+    writer.close()
+# -----------------------------
+# Inference stub (connect your code here)
+# -----------------------------
+@torch.inference_mode()
+def run_inference(
+    audio_path: str,
+    genre: str,
+    cfg_genre: float,
+    cfg_music: float,
+    seed: int,
+    num_frames: int,
+    fps: int,
+) -> np.ndarray:
+    """
+    Returns generated motion as numpy array.
+    Replace the body with your HuMoGen-X sampling logic.
+    """
+    # Load model
+    model = load_model()
+    # Prepare audio
+    audio, sr = load_audio_mono_16k(audio_path, target_sr=16000)
+    # Set seed
+    g = torch.Generator(device=DEVICE)
+    g.manual_seed(int(seed))
+    # -----------------------
+    # TODO: your actual inference
+    # Example pseudo:
+    #   cond = {
+    #       "music": torch.tensor(audio)[None, ...].to(DEVICE),
+    #       "genre": genre_to_id(genre),
+    #   }
+    #   motion = model.sample(
+    #       cond=cond,
+    #       guidance={"genre": cfg_genre, "music": cfg_music},
+    #       num_frames=num_frames,
+    #       generator=g,
+    #   )
+    #   motion_np = motion.detach().cpu().numpy()[0]
+    # -----------------------
+    # Placeholder motion (T, D)
+    T = int(num_frames)
+    D = 151  # adjust to your representation
+    motion_np = np.random.randn(T, D).astype(np.float32)
+    return motion_np
+def generate_demo(
+    audio_file,
+    genre: str,
+    cfg_genre: float,
+    cfg_music: float,
+    seed: int,
+    seconds: float,
+    fps: int,
+    resolution: int,
+):
+    """
+    Gradio handler: takes UI inputs, runs inference, renders mp4, returns mp4 path.
+    """
+    if audio_file is None:
+        raise gr.Error("음악 파일을 업로드해줘!")
+    # audio_file can be a path string
+    audio_path = audio_file if isinstance(audio_file, str) else audio_file.name
+    num_frames = int(max(1, round(seconds * fps)))
+    motion = run_inference(
+        audio_path=audio_path,
+        genre=genre,
+        cfg_genre=float(cfg_genre),
+        cfg_music=float(cfg_music),
+        seed=int(seed),
+        num_frames=num_frames,
+        fps=int(fps),
     )
+    # Save output mp4 to a temp file
+    tmp_dir = Path(tempfile.mkdtemp())
+    out_mp4 = str(tmp_dir / "humogenx_result.mp4")
+    render_motion_to_mp4(
+        motion=motion,
+        out_mp4_path=out_mp4,
+        fps=int(fps),
+        resolution=int(resolution),
+    )
+    return out_mp4
+# -----------------------------
+# Gradio UI
+# -----------------------------
+def build_ui():
+    GENRES = [
+        "HipHop", "Breaking", "Popping", "Locking",
+        "House", "Waacking", "Shuffle", "Disco",
+        "Jazz", "Kpop", "Ballet", "Contemporary"
+    ]  # 네 thesis genre set으로 바꿔도 됨
+    with gr.Blocks(title="HuMoGen-X Demo", theme=gr.themes.Soft()) as demo:
+        gr.Markdown(
+            """
+# HuMoGen-X Demo (Inference-only)
+- **Upload music** → choose **dance genre** → adjust **CFG** → get **MP4**.
+- Model weights are stored in a **private repo** and loaded at runtime.
+            """.strip()
+        )
+        with gr.Row():
+            with gr.Column(scale=1):
+                audio = gr.Audio(label="Music Upload", type="filepath")
+                genre = gr.Dropdown(choices=GENRES, value=GENRES[0], label="Dance Genre")
+                gr.Markdown("### CFG (Classifier-Free Guidance)")
+                cfg_genre = gr.Slider(0.0, 8.0, value=3.0, step=0.1, label="CFG: Genre")
+                cfg_music = gr.Slider(0.0, 8.0, value=3.0, step=0.1, label="CFG: Music")
+                with gr.Row():
+                    seed = gr.Number(value=0, precision=0, label="Seed (int)")
+                    seconds = gr.Slider(1.0, 12.0, value=6.0, step=0.5, label="Length (sec)")
+                with gr.Row():
+                    fps = gr.Dropdown(choices=[20, 24, 30, 60], value=30, label="FPS")
+                    resolution = gr.Dropdown(choices=[256, 512, 720], value=512, label="Render Resolution")
+                run_btn = gr.Button("Generate", variant="primary")
+            with gr.Column(scale=1):
+                out_video = gr.Video(label="Result (MP4)", autoplay=True)
+        run_btn.click(
+            fn=generate_demo,
+            inputs=[audio, genre, cfg_genre, cfg_music, seed, seconds, fps, resolution],
+            outputs=[out_video],
+        )
+        gr.Markdown(
+            """
+### Notes
+- This Space is **inference-only**; weights are not downloadable here.
+- If you want higher quality rendering, replace `render_motion_to_mp4()` with your renderer.
+            """.strip()
+        )
+    return demo
 if __name__ == "__main__":
+    demo = build_ui()
+    demo.queue()
     demo.launch()

old_app.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import gradio as gr
+import numpy as np
+import random
+# import spaces #[uncomment to use ZeroGPU]
+from diffusers import DiffusionPipeline
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model_repo_id = "stabilityai/sdxl-turbo"  # Replace to the model you would like to use
+if torch.cuda.is_available():
+    torch_dtype = torch.float16
+else:
+    torch_dtype = torch.float32
+pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
+pipe = pipe.to(device)
+MAX_SEED = np.iinfo(np.int32).max
+MAX_IMAGE_SIZE = 1024
+# @spaces.GPU #[uncomment to use ZeroGPU]
+def infer(
+    prompt,
+    negative_prompt,
+    seed,
+    randomize_seed,
+    width,
+    height,
+    guidance_scale,
+    num_inference_steps,
+    progress=gr.Progress(track_tqdm=True),
+):
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator().manual_seed(seed)
+    image = pipe(
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        guidance_scale=guidance_scale,
+        num_inference_steps=num_inference_steps,
+        width=width,
+        height=height,
+        generator=generator,
+    ).images[0]
+    return image, seed
+examples = [
+    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
+    "An astronaut riding a green horse",
+    "A delicious ceviche cheesecake slice",
+]
+css = """
+#col-container {
+    margin: 0 auto;
+    max-width: 640px;
+}
+"""
+with gr.Blocks(css=css) as demo:
+    with gr.Column(elem_id="col-container"):
+        gr.Markdown(" # Text-to-Image Gradio Template")
+        with gr.Row():
+            prompt = gr.Text(
+                label="Prompt",
+                show_label=False,
+                max_lines=1,
+                placeholder="Enter your prompt",
+                container=False,
+            )
+            run_button = gr.Button("Run", scale=0, variant="primary")
+        result = gr.Image(label="Result", show_label=False)
+        with gr.Accordion("Advanced Settings", open=False):
+            negative_prompt = gr.Text(
+                label="Negative prompt",
+                max_lines=1,
+                placeholder="Enter a negative prompt",
+                visible=False,
+            )
+            seed = gr.Slider(
+                label="Seed",
+                minimum=0,
+                maximum=MAX_SEED,
+                step=1,
+                value=0,
+            )
+            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+            with gr.Row():
+                width = gr.Slider(
+                    label="Width",
+                    minimum=256,
+                    maximum=MAX_IMAGE_SIZE,
+                    step=32,
+                    value=1024,  # Replace with defaults that work for your model
+                )
+                height = gr.Slider(
+                    label="Height",
+                    minimum=256,
+                    maximum=MAX_IMAGE_SIZE,
+                    step=32,
+                    value=1024,  # Replace with defaults that work for your model
+                )
+            with gr.Row():
+                guidance_scale = gr.Slider(
+                    label="Guidance scale",
+                    minimum=0.0,
+                    maximum=10.0,
+                    step=0.1,
+                    value=0.0,  # Replace with defaults that work for your model
+                )
+                num_inference_steps = gr.Slider(
+                    label="Number of inference steps",
+                    minimum=1,
+                    maximum=50,
+                    step=1,
+                    value=2,  # Replace with defaults that work for your model
+                )
+        gr.Examples(examples=examples, inputs=[prompt])
+    gr.on(
+        triggers=[run_button.click, prompt.submit],
+        fn=infer,
+        inputs=[
+            prompt,
+            negative_prompt,
+            seed,
+            randomize_seed,
+            width,
+            height,
+            guidance_scale,
+            num_inference_steps,
+        ],
+        outputs=[result, seed],
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt CHANGED Viewed

@@ -1,3 +1,11 @@
 accelerate
 diffusers
 invisible_watermark

+gradio>=4.0.0
+huggingface_hub>=0.20.0
+torch
+numpy
+soundfile
+librosa
+imageio
+imageio-ffmpeg
 accelerate
 diffusers
 invisible_watermark