expression-editor

Running

File size: 11,110 Bytes

"""
Expression Editor on ZeroGPU.

Vendored from fofr/cog-expression-editor's underlying ComfyUI workflow
(LoadImage -> ExpressionEditor) but executed without ComfyUI: we clone the
PowerHouseMan/ComfyUI-AdvancedLivePortrait node at startup and stub the
two ComfyUI internals it imports (`folder_paths` and `comfy.utils`).

Weights auto-download to ./models on first run via the node's own loader
(Kijai/LivePortrait_safetensors + Bingsu/adetailer for the YOLO bbox).
"""
import os
import sys
import types
import subprocess

# ------------------------------------------------------------------
# 1. Pull the custom node + stub ComfyUI internals BEFORE importing it
# ------------------------------------------------------------------

# Use a Python-identifier-legal directory name so we can import it as a
# package (the repo's `nodes.py` uses relative imports like
# `from .LivePortrait...` which only work inside a real package).
CUSTOM_NODE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "advanced_live_portrait")
if not os.path.exists(CUSTOM_NODE_DIR):
    subprocess.check_call([
        "git", "clone", "--depth=1",
        "https://github.com/PowerHouseMan/ComfyUI-AdvancedLivePortrait.git",
        CUSTOM_NODE_DIR,
    ])

# Writable paths the node expects
MODELS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models")
TEMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "temp")
os.makedirs(os.path.join(MODELS_DIR, "liveportrait"), exist_ok=True)
os.makedirs(os.path.join(MODELS_DIR, "ultralytics"), exist_ok=True)
os.makedirs(TEMP_DIR, exist_ok=True)

# Minimal `folder_paths` shim
_fp = types.ModuleType("folder_paths")
_fp.models_dir = MODELS_DIR
_fp.output_directory = os.path.join(os.path.dirname(os.path.abspath(__file__)), "outputs")
os.makedirs(_fp.output_directory, exist_ok=True)
_fp.get_folder_paths = lambda m: [os.path.join(MODELS_DIR, m)]
_fp.get_save_image_path = lambda f, d, *a, **k: (d, f, 0, "", f)
_fp.get_temp_directory = lambda: TEMP_DIR
_fp.add_model_folder_path = lambda *a, **k: None
sys.modules["folder_paths"] = _fp

# Minimal `comfy.utils` shim
import torch
import safetensors.torch

_comfy = types.ModuleType("comfy")
_comfy_utils = types.ModuleType("comfy.utils")

def _load_torch_file(ckpt, *args, **kwargs):
    s = str(ckpt)
    if s.endswith(".safetensors"):
        return safetensors.torch.load_file(s)
    return torch.load(s, map_location="cpu", weights_only=False)
_comfy_utils.load_torch_file = _load_torch_file

class _ProgressBar:
    def __init__(self, *a, **k):
        pass
    def update(self, *a, **k):
        pass
    def update_absolute(self, *a, **k):
        pass
_comfy_utils.ProgressBar = _ProgressBar

_comfy.utils = _comfy_utils  # attach as attribute too — `import comfy.utils` then `comfy.utils.X` needs both sys.modules and attribute access
sys.modules["comfy"] = _comfy
sys.modules["comfy.utils"] = _comfy_utils

# ------------------------------------------------------------------
# 2. Now import the node
# ------------------------------------------------------------------

# Parent dir is already on the path (it's the app's CWD); import as a package
import spaces
import gradio as gr
import numpy as np
from PIL import Image

from advanced_live_portrait.nodes import ExpressionEditor, g_engine  # noqa: E402

# Preload pipeline + face detector at module scope so they land in the
# ZeroGPU snapshot. ZeroGPU forks per @spaces.GPU call; with the snapshot,
# the models are already resident in GPU memory on the worker and inference
# starts immediately. Loading them lazily inside the decorated function
# would re-download / re-init on every cold worker.
print("Preloading LivePortrait pipeline + YOLO detector for ZeroGPU snapshot...")
g_engine.get_pipeline()       # downloads + .to('cuda') the 5 LivePortrait modules
g_engine.get_detect_model()   # downloads + loads YOLO face bbox model
print("Preload done.")

# Single global editor (state cached across calls)
_editor = ExpressionEditor()


def _pil_to_node_tensor(img: Image.Image) -> torch.Tensor:
    """ComfyUI image tensors are [N, H, W, C] float32 in [0, 1]."""
    if img.mode != "RGB":
        img = img.convert("RGB")
    arr = np.array(img, dtype=np.float32) / 255.0
    return torch.from_numpy(arr).unsqueeze(0)


def _node_tensor_to_pil(t: torch.Tensor) -> Image.Image:
    arr = (t.squeeze(0).detach().cpu().numpy() * 255).clip(0, 255).astype(np.uint8)
    return Image.fromarray(arr)


@spaces.GPU(duration=60)
def edit_expression(
    image, rotate_pitch, rotate_yaw, rotate_roll,
    blink, eyebrow, wink, pupil_x, pupil_y,
    aaa, eee, woo, smile,
    src_ratio, sample_ratio, sample_parts, crop_factor,
):
    if image is None:
        raise gr.Error("Please upload an image.")
    src_t = _pil_to_node_tensor(image)
    out = _editor.run(
        rotate_pitch=float(rotate_pitch),
        rotate_yaw=float(rotate_yaw),
        rotate_roll=float(rotate_roll),
        blink=float(blink),
        eyebrow=float(eyebrow),
        wink=float(wink),
        pupil_x=float(pupil_x),
        pupil_y=float(pupil_y),
        aaa=float(aaa),
        eee=float(eee),
        woo=float(woo),
        smile=float(smile),
        src_ratio=float(src_ratio),
        sample_ratio=float(sample_ratio),
        sample_parts=sample_parts,
        crop_factor=float(crop_factor),
        src_image=src_t,
    )
    # ExpressionEditor.run returns {"ui": {...}, "result": (out_img, motion_link, exp_data)}
    out_img_t = out["result"][0]
    return _node_tensor_to_pil(out_img_t)


# ------------------------------------------------------------------
# 3. Image preprocess (mirrors original: resize so max side <= 1024)
# ------------------------------------------------------------------

def preprocess_image(img: Image.Image):
    if img is None:
        return None
    if img.mode != "RGB":
        img = img.convert("RGB")
    w, h = img.size
    if w <= 1024 and h <= 1024:
        return img
    if w >= h:
        new_w = 1024
        new_h = int(round(new_w / w * h))
    else:
        new_h = 1024
        new_w = int(round(new_h / h * w))
    return img.resize((new_w, new_h), Image.LANCZOS)


def reset_parameters():
    return (
        gr.update(value=0), gr.update(value=0), gr.update(value=0),
        gr.update(value=0), gr.update(value=0), gr.update(value=0),
        gr.update(value=0), gr.update(value=0),
        gr.update(value=0), gr.update(value=0), gr.update(value=0), gr.update(value=0),
    )


# ------------------------------------------------------------------
# 4. Gradio UI (mirrors fffiloni/expression-editor exactly)
# ------------------------------------------------------------------

css = """
#col-container{max-width: 800px; margin: 0 auto;}
"""

with gr.Blocks(css=css, title="Expression Editor") as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown("# Expression Editor")
        gr.Markdown("Edit a face's expression with sliders. Uses the <a href='https://github.com/PowerHouseMan/ComfyUI-AdvancedLivePortrait' target='_blank'>Expression Editor ComfyUI node</a>, originally packaged by <a href='https://replicate.com/fofr' target='_blank'>fofr</a>.")

        with gr.Row():
            with gr.Column():
                image_in = gr.Image(
                    label="Input image",
                    sources=["upload"],
                    type="pil",
                )

                with gr.Tab("HEAD"):
                    with gr.Column():
                        rotate_pitch = gr.Slider(label="Rotate Up-Down", value=0, minimum=-20, maximum=20)
                        rotate_yaw = gr.Slider(label="Rotate Left-Right turn", value=0, minimum=-20, maximum=20)
                        rotate_roll = gr.Slider(label="Rotate Left-Right tilt", value=0, minimum=-20, maximum=20)

                with gr.Tab("EYES"):
                    with gr.Column():
                        eyebrow = gr.Slider(label="Eyebrow", value=0, minimum=-10, maximum=15)
                        with gr.Row():
                            blink = gr.Slider(label="Blink", value=0, minimum=-20, maximum=5)
                            wink = gr.Slider(label="Wink", value=0, minimum=0, maximum=25)
                        with gr.Row():
                            pupil_x = gr.Slider(label="Pupil X", value=0, minimum=-15, maximum=15)
                            pupil_y = gr.Slider(label="Pupil Y", value=0, minimum=-15, maximum=15)

                with gr.Tab("MOUTH"):
                    with gr.Column():
                        with gr.Row():
                            aaa = gr.Slider(label="Aaa", value=0, minimum=-30, maximum=120)
                            eee = gr.Slider(label="Eee", value=0, minimum=-20, maximum=15)
                            woo = gr.Slider(label="Woo", value=0, minimum=-20, maximum=15)
                        smile = gr.Slider(label="Smile", value=0, minimum=-0.3, maximum=1.3)

                with gr.Tab("More Settings"):
                    with gr.Column():
                        src_ratio = gr.Number(label="Src Ratio", info="Source ratio", value=1)
                        sample_ratio = gr.Slider(label="Sample Ratio", info="Sample ratio", value=1, minimum=-0.2, maximum=1.2)
                        sample_parts = gr.Dropdown(
                            choices=["OnlyExpression", "OnlyRotation", "OnlyMouth", "OnlyEyes", "All"],
                            value="OnlyExpression",
                            label="Sample parts",
                        )
                        crop_factor = gr.Slider(label="Crop Factor", info="Crop factor", value=1.7, minimum=1.5, maximum=2.5)

                with gr.Row():
                    reset_btn = gr.Button("Reset")
                    submit_btn = gr.Button("Submit", variant="primary")

            with gr.Column():
                result_image = gr.Image(label="Output", elem_id="top")

    inputs = [
        image_in, rotate_pitch, rotate_yaw, rotate_roll,
        blink, eyebrow, wink, pupil_x, pupil_y,
        aaa, eee, woo, smile,
        src_ratio, sample_ratio, sample_parts, crop_factor,
    ]
    outputs = [result_image]

    # Resize on upload (matches original 1024-max preprocess)
    image_in.upload(
        fn=preprocess_image,
        inputs=[image_in],
        outputs=[image_in],
        queue=False,
    )

    reset_btn.click(
        fn=reset_parameters,
        inputs=None,
        outputs=[
            rotate_pitch, rotate_yaw, rotate_roll,
            blink, eyebrow, wink, pupil_x, pupil_y,
            aaa, eee, woo, smile,
        ],
        queue=False,
    ).then(fn=edit_expression, inputs=inputs, outputs=outputs)

    submit_btn.click(fn=edit_expression, inputs=inputs, outputs=outputs)

    # Regenerate on slider release (matches original's live-editing feel)
    for slider in (
        rotate_pitch, rotate_yaw, rotate_roll,
        blink, eyebrow, wink, pupil_x, pupil_y,
        aaa, eee, woo, smile,
    ):
        slider.release(fn=edit_expression, inputs=inputs, outputs=outputs, show_progress="minimal")


demo.queue().launch()