Spaces:
Sleeping
Sleeping
| import os | |
| import tempfile | |
| from pathlib import Path | |
| from typing import Tuple, Optional | |
| from functools import lru_cache | |
| import gradio as gr | |
| import numpy as np | |
| import torch | |
| import soundfile as sf | |
| import librosa | |
| from huggingface_hub import hf_hub_download | |
| # ----------------------------- | |
| # Config | |
| # ----------------------------- | |
| DEFAULT_WEIGHTS_REPO = os.environ.get("WEIGHTS_REPO", "isYes/HuMoGen-X-weights") # private model repo | |
| WEIGHTS_FILENAME = os.environ.get("WEIGHTS_FILENAME", "train-0090.pt") # in the private repo | |
| # Space๋ CPU์ผ ์๋ ์๊ณ GPU์ผ ์๋ ์์ | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| # ----------------------------- | |
| # Secure download + load | |
| # ----------------------------- | |
| def load_model(): | |
| """ | |
| Loads model weights from a PRIVATE HF repo using HF_TOKEN (Space Secret). | |
| Cache_resource ensures we load only once per Space runtime. | |
| """ | |
| token = os.environ.get("HF_TOKEN") | |
| if not token: | |
| raise RuntimeError( | |
| "HF_TOKEN secret is missing. Set it in Space Settings -> Secrets." | |
| ) | |
| ckpt_path = hf_hub_download( | |
| repo_id=DEFAULT_WEIGHTS_REPO, | |
| filename=WEIGHTS_FILENAME, | |
| token=token, | |
| ) | |
| # TODO: replace this with your actual model class init + load_state_dict | |
| # Example patterns: | |
| # model = HuMoGenX(...) | |
| # state = torch.load(ckpt_path, map_location="cpu") | |
| # model.load_state_dict(state["state_dict"] if "state_dict" in state else state) | |
| # model.to(DEVICE).eval() | |
| # | |
| # Here we keep a placeholder "model" object. | |
| model = torch.load(ckpt_path, map_location="cpu") | |
| if hasattr(model, "to"): | |
| model = model.to(DEVICE) | |
| if hasattr(model, "eval"): | |
| model.eval() | |
| return model | |
| # ----------------------------- | |
| # Utilities | |
| # ----------------------------- | |
| def load_audio_mono_16k(audio_path: str, target_sr: int = 16000) -> Tuple[np.ndarray, int]: | |
| """ | |
| Loads audio file and converts to mono float32 at target_sr. | |
| """ | |
| y, sr = librosa.load(audio_path, sr=target_sr, mono=True) | |
| y = y.astype(np.float32) | |
| return y, target_sr | |
| def render_motion_to_mp4( | |
| motion: np.ndarray, | |
| out_mp4_path: str, | |
| fps: int = 30, | |
| resolution: int = 512, | |
| ): | |
| """ | |
| TODO: Replace this with your real renderer. | |
| This function should create an mp4 from the generated motion. | |
| - motion: (T, D) or (T, J, 3) etc. | |
| - out_mp4_path: path to save mp4 | |
| Options: | |
| 1) lightweight: matplotlib stick figure -> imageio mp4 | |
| 2) medium: pyrender / trimesh | |
| 3) heavy: Blender (๋ณดํต Space์์ ๋น์ถ) | |
| For now, we'll create a dummy black video so the UI pipeline is complete. | |
| """ | |
| import imageio.v2 as imageio | |
| T = int(motion.shape[0]) if motion is not None else 60 | |
| frames = [] | |
| for _ in range(T): | |
| frame = np.zeros((resolution, resolution, 3), dtype=np.uint8) | |
| frames.append(frame) | |
| writer = imageio.get_writer(out_mp4_path, fps=fps) | |
| for f in frames: | |
| writer.append_data(f) | |
| writer.close() | |
| # ----------------------------- | |
| # Inference stub (connect your code here) | |
| # ----------------------------- | |
| def run_inference( | |
| audio_path: str, | |
| genre: str, | |
| cfg_genre: float, | |
| cfg_music: float, | |
| seed: int, | |
| num_frames: int, | |
| fps: int, | |
| ) -> np.ndarray: | |
| """ | |
| Returns generated motion as numpy array. | |
| Replace the body with your HuMoGen-X sampling logic. | |
| """ | |
| # Load model | |
| model = load_model() | |
| # Prepare audio | |
| audio, sr = load_audio_mono_16k(audio_path, target_sr=16000) | |
| # Set seed | |
| g = torch.Generator(device=DEVICE) | |
| g.manual_seed(int(seed)) | |
| # ----------------------- | |
| # TODO: your actual inference | |
| # Example pseudo: | |
| # cond = { | |
| # "music": torch.tensor(audio)[None, ...].to(DEVICE), | |
| # "genre": genre_to_id(genre), | |
| # } | |
| # motion = model.sample( | |
| # cond=cond, | |
| # guidance={"genre": cfg_genre, "music": cfg_music}, | |
| # num_frames=num_frames, | |
| # generator=g, | |
| # ) | |
| # motion_np = motion.detach().cpu().numpy()[0] | |
| # ----------------------- | |
| # Placeholder motion (T, D) | |
| T = int(num_frames) | |
| D = 151 # adjust to your representation | |
| motion_np = np.random.randn(T, D).astype(np.float32) | |
| return motion_np | |
| def generate_demo( | |
| audio_file, | |
| genre: str, | |
| cfg_genre: float, | |
| cfg_music: float, | |
| seed: int, | |
| seconds: float, | |
| fps: int, | |
| resolution: int, | |
| ): | |
| """ | |
| Gradio handler: takes UI inputs, runs inference, renders mp4, returns mp4 path. | |
| """ | |
| if audio_file is None: | |
| raise gr.Error("์์ ํ์ผ์ ์ ๋ก๋ํด์ค!") | |
| # audio_file can be a path string | |
| audio_path = audio_file if isinstance(audio_file, str) else audio_file.name | |
| num_frames = int(max(1, round(seconds * fps))) | |
| motion = run_inference( | |
| audio_path=audio_path, | |
| genre=genre, | |
| cfg_genre=float(cfg_genre), | |
| cfg_music=float(cfg_music), | |
| seed=int(seed), | |
| num_frames=num_frames, | |
| fps=int(fps), | |
| ) | |
| # Save output mp4 to a temp file | |
| tmp_dir = Path(tempfile.mkdtemp()) | |
| out_mp4 = str(tmp_dir / "humogenx_result.mp4") | |
| render_motion_to_mp4( | |
| motion=motion, | |
| out_mp4_path=out_mp4, | |
| fps=int(fps), | |
| resolution=int(resolution), | |
| ) | |
| return out_mp4 | |
| # ----------------------------- | |
| # Gradio UI | |
| # ----------------------------- | |
| def build_ui(): | |
| GENRES = [ | |
| "HipHop", "Breaking", "Popping", "Locking", | |
| "House", "Waacking", "Shuffle", "Disco", | |
| "Jazz", "Kpop", "Ballet", "Contemporary" | |
| ] # ๋ค thesis genre set์ผ๋ก ๋ฐ๊ฟ๋ ๋จ | |
| with gr.Blocks(title="HuMoGen-X Demo", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| """ | |
| # HuMoGen-X Demo (Inference-only) | |
| - **Upload music** โ choose **dance genre** โ adjust **CFG** โ get **MP4**. | |
| - Model weights are stored in a **private repo** and loaded at runtime. | |
| """.strip() | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| audio = gr.Audio(label="Music Upload", type="filepath") | |
| genre = gr.Dropdown(choices=GENRES, value=GENRES[0], label="Dance Genre") | |
| gr.Markdown("### CFG (Classifier-Free Guidance)") | |
| cfg_genre = gr.Slider(0.0, 8.0, value=3.0, step=0.1, label="CFG: Genre") | |
| cfg_music = gr.Slider(0.0, 8.0, value=3.0, step=0.1, label="CFG: Music") | |
| with gr.Row(): | |
| seed = gr.Number(value=0, precision=0, label="Seed (int)") | |
| seconds = gr.Slider(1.0, 12.0, value=6.0, step=0.5, label="Length (sec)") | |
| with gr.Row(): | |
| fps = gr.Dropdown(choices=[20, 24, 30, 60], value=30, label="FPS") | |
| resolution = gr.Dropdown(choices=[256, 512, 720], value=512, label="Render Resolution") | |
| run_btn = gr.Button("Generate", variant="primary") | |
| with gr.Column(scale=1): | |
| out_video = gr.Video(label="Result (MP4)", autoplay=True) | |
| run_btn.click( | |
| fn=generate_demo, | |
| inputs=[audio, genre, cfg_genre, cfg_music, seed, seconds, fps, resolution], | |
| outputs=[out_video], | |
| ) | |
| gr.Markdown( | |
| """ | |
| ### Notes | |
| - This Space is **inference-only**; weights are not downloadable here. | |
| - If you want higher quality rendering, replace `render_motion_to_mp4()` with your renderer. | |
| """.strip() | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = build_ui() | |
| demo.queue() | |
| demo.launch() | |