| |
| |
| |
| |
| |
| |
|
|
| import os |
|
|
| |
| |
| _APP_DIR = os.path.dirname(os.path.abspath(__file__)) |
| _WHEELS_DIR = os.path.join(_APP_DIR, "wheels") |
| _IS_SPACES = os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID") |
|
|
| if _IS_SPACES: |
| os.system("rm -rf /data-nvme/zerogpu-offload/ 2>/dev/null || true") |
| os.system("pip install numpy==1.23.0") |
| |
| if os.path.isdir(_WHEELS_DIR): |
| for _f in sorted(os.listdir(_WHEELS_DIR)): |
| if _f.endswith(".whl"): |
| _whl = os.path.join(_WHEELS_DIR, _f) |
| os.system(f'pip install "{_whl}" --force-reinstall') |
| os.system("pip install chumpy==0.70 --no-build-isolation") |
| os.system( |
| "pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt240/download.html" |
| ) |
| |
|
|
| import argparse |
| import glob |
| import shutil |
| import sys |
| from typing import Any, Dict, List, Optional, Tuple, Union |
|
|
| |
| _SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) |
| if _SCRIPT_DIR not in sys.path: |
| sys.path.insert(0, _SCRIPT_DIR) |
| os.chdir(_SCRIPT_DIR) |
|
|
| import ffmpegio |
| import gradio as gr |
| import imageio.v3 as iio |
| import numpy as np |
| import torch |
| from accelerate import Accelerator |
| from omegaconf import DictConfig |
| from PIL import Image |
|
|
| |
| try: |
| import spaces |
| GPU_DECORATOR = lambda fn: spaces.GPU(duration=80)(fn) |
| except ImportError: |
| GPU_DECORATOR = lambda fn: fn |
|
|
| torch._dynamo.config.disable = True |
|
|
| from core.datasets.data_utils import SrcImagePipeline |
| from core.utils.app_utils import ( |
| get_motion_information, |
| prepare_examples_ordered, |
| prepare_input_and_output, |
| split_image, |
| ) |
| from core.utils.model_card import MODEL_CONFIG |
| from core.utils.model_download_utils import AutoModelQuery |
| from engine.pose_estimation.pose_estimator import PoseEstimator |
| from scripts.download_motion_video import motion_video_check |
| from scripts.inference.app_inference import ( |
| build_app_model, |
| inference_results, |
| parse_app_configs, |
| ) |
| from scripts.inference.utils import ( |
| assert_input_image, |
| easy_memory_manager, |
| get_image_base64, |
| prepare_working_dir, |
| ) |
|
|
| |
| MOTION_SIZE: int = 120 |
| MV_IMAGES_THRESHOLD: int = 8 |
| NUM_VIEWS: int = 2 |
| DEFAULT_FPS: int = 30 |
| DEFAULT_VIDEO_CODEC: str = "libx264" |
| DEFAULT_PIXEL_FORMAT: str = "yuv420p" |
| DEFAULT_VIDEO_BITRATE: str = "10M" |
| MACRO_BLOCK_SIZE: int = 16 |
|
|
|
|
| def get_motion_video_fps( |
| video_params: Union[str, Dict[str, Any]], default: int = DEFAULT_FPS |
| ) -> int: |
| """Get FPS from samurai_visualize.mp4 in the motion video directory.""" |
| try: |
| path = video_params |
| if isinstance(video_params, dict): |
| path = video_params.get("path") or video_params.get("name") or "" |
| if not path or not isinstance(path, str): |
| return default |
| vst = ffmpegio.probe.video_streams_basic(path) |
| if not vst or "frame_rate" not in vst[0]: |
| return default |
| fr = vst[0]["frame_rate"] |
| if hasattr(fr, "numerator") and hasattr(fr, "denominator"): |
| fps_val = fr.numerator / fr.denominator if fr.denominator else default |
| else: |
| fps_val = float(fr) if fr else default |
| return max(1, round(fps_val)) |
| except Exception: |
| return default |
|
|
|
|
| def demo_lhmpp( |
| lhmpp: Optional[torch.nn.Module], |
| pose_estimator: Optional[PoseEstimator], |
| dataset_pipeline: SrcImagePipeline, |
| cfg: DictConfig, |
| ) -> None: |
| """Initialize and launch the Gradio demo interface.""" |
|
|
| @GPU_DECORATOR |
| def core_fn( |
| image: Any, |
| video: Any, |
| ref_view: int, |
| video_params: Union[str, Dict[str, Any]], |
| visualized_center: bool, |
| working_dir: Any, |
| motion_size: int, |
| render_fps: int, |
| ) -> Tuple[Any, Optional[str], None, List[Image.Image]]: |
| """Core inference function for the Gradio interface.""" |
| import gradio as gr |
|
|
| device = "cuda" |
| dtype = torch.float32 |
|
|
| try: |
| ( |
| imgs, |
| input_imgs_path, |
| motion_path, |
| dump_image_dir, |
| dump_video_path, |
| ) = prepare_input_and_output( |
| image, video, ref_view, video_params, working_dir, dataset_pipeline, cfg |
| ) |
| except ValueError as e: |
| raise gr.Error(str(e)) from e |
|
|
| motion_name, motion_seqs = get_motion_information(motion_path, cfg, motion_size) |
| video_size = len(motion_seqs["motion_seqs"]) |
|
|
| with torch.no_grad(): |
| with easy_memory_manager(pose_estimator, device="cuda"): |
| shape_pose = pose_estimator(imgs[0]) |
| assert shape_pose.is_full_body, f"The input image is illegal, {shape_pose.msg}" |
|
|
| img_np = np.stack(imgs) / 255.0 |
| ref_imgs_tensor = ( |
| torch.from_numpy(img_np).permute(0, 3, 1, 2).float().to(device) |
| ) |
| smplx_params = motion_seqs["smplx_params"] |
| smplx_params["betas"] = torch.tensor( |
| shape_pose.beta, dtype=dtype, device=device |
| ).unsqueeze(0) |
|
|
| rgbs = inference_results( |
| lhmpp, |
| ref_imgs_tensor, |
| smplx_params, |
| motion_seqs, |
| video_size=video_size, |
| visualized_center=visualized_center, |
| device=device, |
| ) |
|
|
| render_fps_actual = get_motion_video_fps(video_params, default=render_fps) |
|
|
| iio.imwrite( |
| dump_video_path, |
| rgbs, |
| fps=render_fps_actual, |
| codec=DEFAULT_VIDEO_CODEC, |
| pixelformat=DEFAULT_PIXEL_FORMAT, |
| bitrate=DEFAULT_VIDEO_BITRATE, |
| macro_block_size=MACRO_BLOCK_SIZE, |
| ) |
| print(f"Saved video to {dump_video_path}") |
| selected_pils = [ |
| Image.fromarray(np.clip(img, 0, 255).astype(np.uint8)) for img in imgs |
| ] |
| return image, dump_video_path, None, selected_pils |
|
|
| gallery_css = """ |
| .gradio-container { max-width: 95vw !important; } |
| .image-prompt-gallery { min-height: 120px !important; } |
| .image-prompt-gallery img { object-fit: contain !important; } |
| #selected_input_gallery { min-height: 240px !important; overflow: visible !important; } |
| #selected_input_gallery .thumbnail-item img, |
| #selected_input_gallery img { object-fit: contain !important; } |
| #image_examples, #image_examples .wrap, #image_examples .contain, |
| #image_examples .block, #image_examples > div { |
| max-width: none !important; width: 100% !important; box-sizing: border-box !important; |
| } |
| #image_examples { width: 100% !important; } |
| #image_examples .wrap, #image_examples .contain, |
| #image_examples > div > div, #image_examples [class*="example"] { |
| width: 100% !important; display: grid !important; |
| grid-template-columns: repeat(2, minmax(0, 1fr)) !important; gap: 12px !important; |
| } |
| #image_examples .example, #image_examples [class*="example"] > * { |
| width: 100% !important; min-width: 0 !important; min-height: 160px !important; |
| } |
| #image_examples img { |
| width: 100% !important; height: auto !important; |
| max-height: 200px !important; object-fit: contain !important; |
| } |
| .lhmpp-header { text-align: center !important; margin-bottom: 16px !important; } |
| .lhmpp-header .header-logo { height: 140px !important; display: block !important; margin: 0 auto 12px auto !important; } |
| .lhmpp-header .publication-title { margin-bottom: 0 !important; font-size: 2.2rem !important; } |
| .lhmpp-badges { display: flex !important; flex-wrap: wrap !important; justify-content: center !important; gap: 8px !important; } |
| .lhmpp-badges a img { height: 32px !important; } |
| .video-row { gap: 16px !important; } |
| .video-row .video-wrapper { min-width: 0 !important; flex: 1 !important; } |
| .video-row video, .video-row .gr-video { width: 100% !important; max-width: 100% !important; } |
| .section-label { font-size: 0.95em !important; margin-bottom: 6px !important; } |
| """ |
| with gr.Blocks( |
| title="LHM++", |
| analytics_enabled=False, |
| css=gallery_css, |
| theme=gr.themes.Soft(primary_hue="violet", secondary_hue="purple"), |
| ) as demo: |
| logo_url = "./assets/LHM++_logo.png" |
| logo_base64 = get_image_base64(logo_url) |
| gr.HTML( |
| f""" |
| <div class="lhmpp-header" style="margin-bottom: 12px;"> |
| <img src="{logo_base64}" class="header-logo" alt="LHM++ Logo"/> |
| <h2 class="publication-title"><b style="color: #7B2FBE">L</b><b style="color: #9B59B6">H</b><b style="color: #C084FC">M</b><b style="background: linear-gradient(135deg, #C084FC, #E879F9); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; font-size: 1.1em;">++</b>: <b style="color: #6D28D9">A</b>n <b style="color: #7C3AED">E</b>fficient <b style="color: #8B5CF6">L</b>arge <b style="color: #9B59B6">H</b>uman <b style="color: #A78BFA">R</b>econstruction <b style="color: #B794F4">M</b>odel for <b style="color: #C084FC">P</b>ose-free <b style="color: #D6BCFA">I</b>mages to <b style="color: #E879F9">3D</b></h2> |
| </div> |
| <div class="lhmpp-badges" style="margin-bottom: 16px;"> |
| <a title="Website" href="https://lingtengqiu.github.io/LHM++/" target="_blank" rel="noopener noreferrer"> |
| <img src="https://www.obukhov.ai/img/badges/badge-website.svg" alt="Website"> |
| </a> |
| <a title="Read Paper" href="https://arxiv.org/pdf/2506.13766v2" target="_blank" rel="noopener noreferrer"> |
| <img src="https://www.obukhov.ai/img/badges/badge-pdf.svg" alt="Paper"> |
| </a> |
| <a title="GitHub" href="https://github.com/aigc3d/LHM-plusplus" target="_blank" rel="noopener noreferrer"> |
| <img src="https://img.shields.io/github/stars/aigc3d/LHM-plusplus?label=GitHub%20%E2%98%85&logo=github&color=C8C" alt="GitHub"> |
| </a> |
| <a title="Video" href="https://www.youtube.com/watch?v=Nipf3jdSi34" target="_blank" rel="noopener noreferrer"> |
| <img src="https://img.shields.io/badge/YouTube-QiuLingteng-red?logo=youtube" alt="YouTube"> |
| </a> |
| </div> |
| """ |
| ) |
|
|
| gr.HTML( |
| """<p style="color: #c00; font-size: 0.9em; margin-bottom: 12px; font-weight: 500;"> |
| ⚠️ Human images only. Motion video max 1000 frames. LHMPP-700M requires ≥8 GB GPU. Videos may exhibit white occlusion artifacts due to cropping with the original mask, and some motion estimation may have slight jittering. |
| </p>""" |
| ) |
|
|
| with gr.Row(equal_height=False): |
| with gr.Column(variant="panel", scale=3, min_width=320): |
| with gr.Accordion(label="⚙️ Input Settings", open=False): |
| with gr.Row(): |
| num_images = gr.Slider( |
| label="Ref. Images", |
| minimum=1, |
| maximum=8, |
| value=8, |
| step=1, |
| elem_id="num_input_images", |
| interactive=True, |
| scale=1, |
| ) |
| motion_size = gr.Slider( |
| label="Motion Frames", |
| minimum=30, |
| maximum=1000, |
| value=MOTION_SIZE, |
| step=2, |
| elem_id="motion_size", |
| interactive=True, |
| scale=1, |
| ) |
| with gr.Row(): |
| render_fps = gr.Slider( |
| label="FPS", |
| minimum=10, |
| maximum=120, |
| value=DEFAULT_FPS, |
| step=1, |
| elem_id="render_fps", |
| interactive=True, |
| scale=1, |
| ) |
| center_visualizen = gr.Checkbox( |
| label="Center Crop", value=False, scale=1 |
| ) |
|
|
| with gr.Tabs(elem_id="core_input_image"): |
| with gr.Tab(label="Input Video or Images"): |
| image_prompt = gr.Image( |
| label="Image Prompt", |
| format="png", |
| visible=False, |
| image_mode="RGBA", |
| type="pil", |
| height=360, |
| ) |
| default_imgs = glob.glob( |
| "./assets/example_multi_images/00000_yuliang_*.png" |
| ) |
| default_imgs = sorted(default_imgs)[:MV_IMAGES_THRESHOLD] |
| default_imgs = ( |
| [Image.open(img) for img in default_imgs] |
| if default_imgs |
| else [] |
| ) |
| multiimage_prompt = gr.Gallery( |
| label="Image Prompt", |
| format="png", |
| type="pil", |
| height=280, |
| columns=8, |
| value=default_imgs, |
| object_fit="contain", |
| elem_classes="image-prompt-gallery", |
| ) |
| video_upload_input = gr.Video( |
| label="Or Input Video", |
| height=300, |
| interactive=True, |
| value=None, |
| elem_id="Or video input.", |
| ) |
| gr.Markdown( |
| "Upload multi-view human images or a video.", |
| elem_classes="small-hint", |
| ) |
|
|
| _all_examples = prepare_examples_ordered() |
| examples = gr.Examples( |
| examples=_all_examples, |
| inputs=[image_prompt], |
| fn=lambda img: split_image(img, NUM_VIEWS), |
| outputs=[multiimage_prompt], |
| run_on_click=True, |
| examples_per_page=8, |
| elem_id="image_examples", |
| ) |
| submit = gr.Button( |
| "Generate", elem_id="core_generate", variant="primary", size="lg" |
| ) |
|
|
| |
| motion_video_dir = "./motion_video" |
| os.makedirs(motion_video_dir, exist_ok=True) |
| examples_video = [ |
| d |
| for d in os.listdir(motion_video_dir) |
| if os.path.isdir(os.path.join(motion_video_dir, d)) |
| ] |
| examples = [ |
| os.path.join(motion_video_dir, ex, "samurai_visualize.mp4") |
| for ex in examples_video |
| ] |
| examples = list(filter(lambda x: os.path.exists(x), examples)) |
| examples = sorted(examples) |
| new_examples = [] |
| for ex in examples: |
| video_basename = os.path.basename(os.path.dirname(ex)) |
| input_video = os.path.join(os.path.dirname(ex), video_basename + ".mp4") |
| if not os.path.exists(input_video): |
| try: |
| shutil.copyfile(ex, input_video) |
| except Exception: |
| pass |
| new_examples.append(input_video) |
|
|
| with gr.Column(variant="panel", scale=5, min_width=400): |
| gr.Markdown( |
| "**Selected Input** (used for inference):", |
| elem_classes="section-label", |
| ) |
| selected_input_tag = gr.Gallery( |
| label="Selected Input Images (used for inference)", |
| format="png", |
| type="pil", |
| height=200, |
| columns=8, |
| value=[], |
| object_fit="contain", |
| show_label=False, |
| elem_id="selected_input_gallery", |
| ) |
| gr.Markdown("**Motion & Output**", elem_classes="section-label") |
| with gr.Row(variant="default", elem_classes="video-row"): |
| with gr.Column(scale=1): |
| video_input = gr.Video( |
| label="Motion Video", |
| height=320, |
| interactive=False, |
| value=(new_examples[0] if len(new_examples) >= 1 else None), |
| autoplay=True, |
| ) |
| with gr.Column(scale=2): |
| output_video = gr.Video( |
| label="Rendered Video", |
| format="mp4", |
| height=480, |
| autoplay=True, |
| ) |
| if new_examples: |
| gr.Examples( |
| examples=new_examples, |
| inputs=[video_input], |
| examples_per_page=8, |
| ) |
|
|
| working_dir = gr.State() |
| submit.click( |
| fn=assert_input_image, |
| inputs=[multiimage_prompt], |
| queue=False, |
| ).success( |
| fn=prepare_working_dir, |
| outputs=[working_dir], |
| queue=False, |
| ).success( |
| fn=core_fn, |
| inputs=[ |
| multiimage_prompt, |
| video_upload_input, |
| num_images, |
| video_input, |
| center_visualizen, |
| working_dir, |
| motion_size, |
| render_fps, |
| ], |
| outputs=[ |
| multiimage_prompt, |
| output_video, |
| video_upload_input, |
| selected_input_tag, |
| ], |
| ) |
|
|
| demo.queue() |
| demo.launch(server_name="0.0.0.0") |
|
|
|
|
| def prior_model_check(save_dir: str = "./pretrained_models") -> None: |
| """Check if prior models exist with valid targets; if not, download and link.""" |
| human_model_path = os.path.join(save_dir, "human_model_files") |
| if os.path.exists(human_model_path): |
| return |
| if os.path.islink(human_model_path): |
| try: |
| os.unlink(human_model_path) |
| print("Removed broken symlink: human_model_files") |
| except OSError as e: |
| print(f"Failed to remove broken symlink: {e}") |
| print("Prior models not found or invalid. Downloading...") |
| auto_query = AutoModelQuery(save_dir=save_dir) |
| auto_query.download_all_prior_models() |
| print("Prior models ready.") |
|
|
|
|
| def get_parse() -> argparse.Namespace: |
| """Parse command line arguments.""" |
| parser = argparse.ArgumentParser( |
| description="LHM-gradio: Large Animatable Human Model" |
| ) |
| parser.add_argument( |
| "--model_name", |
| default="LHMPP-700M", |
| type=str, |
| choices=["LHMPP-700M", "LHMPPS-700M"], |
| help="Model name", |
| ) |
| parser.add_argument("--debug", action="store_true", help="Debug mode") |
| return parser.parse_args() |
|
|
|
|
| def launch_gradio_app() -> None: |
| """Launch the Gradio application.""" |
| accelerator = Accelerator() |
| args = get_parse() |
| model_name = args.model_name |
|
|
| prior_model_check(save_dir="./pretrained_models") |
| motion_video_check(save_dir=".") |
|
|
| auto_query = AutoModelQuery(save_dir="./pretrained_models") |
| model_path: str = auto_query.query(model_name) |
| model_config: str = MODEL_CONFIG[model_name] |
| model_cards: Dict[str, Dict[str, Any]] = { |
| model_name: { |
| "model_path": model_path, |
| "model_config": model_config, |
| } |
| } |
|
|
| os.environ.update( |
| { |
| "APP_ENABLED": "1", |
| "APP_MODEL_NAME": model_name, |
| "APP_TYPE": "infer.human_lrm_a4o", |
| "NUMBA_THREADING_LAYER": "omp", |
| } |
| ) |
|
|
| processing_list: List[Dict[str, Any]] = [ |
| dict( |
| name="PadRatioWithScale", |
| target_ratio=5 / 3, |
| tgt_max_size_list=[840], |
| val=True, |
| ), |
| ] |
|
|
| cfg, _ = parse_app_configs(model_cards) |
|
|
| dataset_pipeline = SrcImagePipeline(*processing_list) |
|
|
| lhmpp: Optional[torch.nn.Module] = None |
| pose_estimator: Optional[PoseEstimator] = None |
| if args.debug: |
| lhmpp = None |
| pose_estimator = None |
| else: |
| lhmpp = build_app_model(cfg) |
| lhmpp.to("cuda") |
| pose_estimator = PoseEstimator( |
| "./pretrained_models/human_model_files/", device="cpu" |
| ) |
| pose_estimator.device = "cuda" |
|
|
| demo_lhmpp(lhmpp, pose_estimator, dataset_pipeline, cfg) |
|
|
|
|
| if __name__ == "__main__": |
| launch_gradio_app() |
|
|