InstantCharacter-OnePiece

Paused

App Files Files Community

Nad54 commited on Nov 7, 2025

Commit

7a3c654

verified ·

1 Parent(s): 4aad209

Update app.py

Browse files

Files changed (1) hide show

app.py +279 -167

app.py CHANGED Viewed

@@ -1,246 +1,358 @@
-import os
-# Nouveau nom d'env torch (l'ancien est déprécié)
-os.environ.setdefault("PYTORCH_ALLOC_CONF", "expandable_segments:True")
-# Évite plusieurs workers concurrent → moins de VRAM surprises
-os.environ.setdefault("GRADIO_NUM_WORKERS", "1")
-import sys
 sys.path.append('../')
 import spaces
 import torch
 import random
 import numpy as np
 from PIL import Image
-import gradio as gr
 from huggingface_hub import hf_hub_download
 from transformers import AutoModelForImageSegmentation
 from torchvision import transforms
 from pipeline import InstantCharacterFluxPipeline
-# =====================================================
-# CONFIG GÉNÉRALE
-# =====================================================
 MAX_SEED = np.iinfo(np.int32).max
-device = "cuda" if torch.cuda.is_available() else "cpu"
-dtype = torch.float16          # L40S: FP16 OK
-HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACEHUB_API_TOKEN")
-def need_token_guard():
-    if HF_TOKEN is None:
-        raise gr.Error(
-            "⚠️ Token manquant : ajoute un secret 'HF_TOKEN' (Settings → Repository secrets) "
-            "avec un token Hugging Face ayant accès à black-forest-labs/FLUX.1-dev."
-        )
-# =====================================================
-# TÉLÉCHARGEMENTS / MODÈLES
-# =====================================================
-base_model = "black-forest-labs/FLUX.1-dev"
-image_encoder_path = "google/siglip-so400m-patch14-384"
-# >>> On remplace le 'giant' par un modèle BEAUCOUP plus léger
-image_encoder_2_path = "facebook/dinov2-base"
-birefnet_path = "ZhengPeng7/BiRefNet"
-def _dl(repo_id, filename, token=None):
-    return hf_hub_download(repo_id=repo_id, filename=filename, token=token)
-need_token_guard()
-# IP-Adapter (≈5.6 Go)
-ip_adapter_path = _dl("tencent/InstantCharacter", "instantcharacter_ip-adapter.bin", HF_TOKEN)
-# >>> Ton LoRA One Piece (FLUX) <<<
-onepiece_flux_lora_path = "./onepiece_flux_v2.safetensors"
-onepiece_flux_trigger = "onepiece style"
-# =====================================================
-# INITIALISATION DU PIPELINE (tout GPU, pas d'offload CPU)
-# =====================================================
 pipe = InstantCharacterFluxPipeline.from_pretrained(
-    base_model,
-    torch_dtype=dtype,
-    token=HF_TOKEN,
-    low_cpu_mem_usage=True,  # réduit le pic RAM au chargement
 )
-# Tout sur GPU (L40S a de la marge)
 pipe.to(device)
-# xFormers si dispo
-try:
-    pipe.enable_xformers_memory_efficient_attention()
-except Exception:
-    pass
-pipe.set_progress_bar_config(disable=True)
-if hasattr(pipe, "vae"):
-    if hasattr(pipe.vae, "enable_slicing"):
-        pipe.vae.enable_slicing()
-    if hasattr(pipe.vae, "enable_tiling"):
-        pipe.vae.enable_tiling()
-# Adapter avec 2 encodeurs (le 2e est 'dinov2-base', léger)
 pipe.init_adapter(
     image_encoder_path=image_encoder_path,
     image_encoder_2_path=image_encoder_2_path,
-    subject_ipadapter_cfg=dict(subject_ip_adapter_path=ip_adapter_path, nb_token=512),  # 512 suffit et économise la RAM
 )
-# =====================================================
-# MATTEUR (BiRefNet) – SUR CPU POUR ÉCONOMISER LA VRAM
-# =====================================================
 birefnet = AutoModelForImageSegmentation.from_pretrained(
-    birefnet_path, trust_remote_code=True, token=HF_TOKEN
 )
-birefnet.to("cpu").eval()
 birefnet_transform_image = transforms.Compose([
     transforms.Resize((1024, 1024)),
     transforms.ToTensor(),
-    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
 ])
 def remove_bkg(subject_image):
     def infer_matting(img_pil):
-        imgs = birefnet_transform_image(img_pil).unsqueeze(0).to("cpu")
         with torch.no_grad():
-            preds = birefnet(imgs)[-1].sigmoid().cpu()
         pred = preds[0].squeeze()
-        mask = np.array(transforms.ToPILImage()(pred).resize(img_pil.size))
-        return mask[..., None]
     def get_bbox_from_mask(mask, th=128):
-        h, w = mask.shape[:2]
-        x = np.where(mask.max(0) >= th)[0]
-        y = np.where(mask.max(1) >= th)[0]
-        if len(x) == 0 or len(y) == 0:
-            return [0, 0, w, h]
-        return [int(x[0]), int(y[0]), int(x[-1]), int(y[-1])]
-    def pad_to_square(image, pad_value=255):
-        H, W = image.shape[:2]
         if H == W:
             return image
-        pad = abs(H - W)
-        pad1, pad2 = pad // 2, pad - pad // 2
         if H > W:
-            pad_param = ((0, 0), (pad1, pad2), (0, 0))
         else:
-            pad_param = ((pad1, pad2), (0, 0), (0, 0))
-        return np.pad(image, pad_param, "constant", constant_values=pad_value)
-    mask = infer_matting(subject_image)[..., 0]
-    x1, y1, x2, y2 = get_bbox_from_mask(mask)
-    subject_np = np.array(subject_image)
-    mask[mask > 128] = 255
-    mask[mask < 128] = 0
-    mask_3c = np.stack([mask] * 3, axis=-1)
-    obj = mask_3c / 255 * subject_np + (1 - mask_3c / 255) * 255
-    crop = obj[y1:y2, x1:x2]
-    crop = pad_to_square(crop)
-    return Image.fromarray(crop.astype(np.uint8))
-# =====================================================
-# OUTILS
-# =====================================================
 def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
-    return random.randint(0, MAX_SEED) if randomize_seed else seed
-# =====================================================
-# GÉNÉRATION D'IMAGE
-# =====================================================
 @spaces.GPU
 def create_image(
     input_image,
     prompt,
-    scale,                 # force d'identité (IP-Adapter)
     guidance_scale,
     num_inference_steps,
     seed,
-    lora_strength=0.85,
-    width=896,
-    height=896,
 ):
-    if input_image is None:
-        raise gr.Error("Merci d'uploader une image de visage.")
-    if not os.path.exists(onepiece_flux_lora_path):
-        raise gr.Error(f"Fichier LoRA introuvable : {onepiece_flux_lora_path}. "
-                       f"Place-le à la racine du Space.")
-    # Détourage/crop du sujet (CPU)
     input_image = remove_bkg(input_image)
-    # Générateur sur GPU (évite mismatch)
-    generator = torch.Generator(device=device).manual_seed(int(seed))
-    images = pipe.with_style_lora(
-        lora_file_path=onepiece_flux_lora_path,
-        trigger=onepiece_flux_trigger,
-        prompt=prompt,
-        num_inference_steps=int(num_inference_steps),
-        guidance_scale=float(guidance_scale),
-        width=int(width),
-        height=int(height),
-        subject_image=input_image,
-        subject_scale=float(scale),
-        lora_scale=float(lora_strength),
-        generator=generator,
-    ).images
     return images
-# =====================================================
-# INTERFACE GRADIO
-# =====================================================
-title = "<h1 align='center'>InstantCharacter (FLUX.1-dev) + One Piece (FLUX LoRA)</h1>"
-description = (
-    "<b>GPU :</b> Nvidia L40S — pipeline FP16 tout-GPU (pas d'offload CPU).<br>"
-    "Encodeurs: SigLIP + DINOv2-BASE (léger). IP-Adapter nb_token=512 (éco RAM).<br>"
-    "Résolution par défaut : 896 × 896 (tu peux monter à 1024 si stable)."
-)
-# ⚠️ Gradio: pas de 'concurrency_count' sur certaines versions
-block = gr.Blocks(css="footer {visibility: hidden}").queue(max_size=5, api_open=False)
 with block:
-    gr.Markdown(title)
-    gr.Markdown(description)
     with gr.Row():
         with gr.Column():
-            image_pil = gr.Image(label="Source Image", type="pil")
             prompt = gr.Textbox(
                 label="Prompt",
-                value="onepiece style, a pirate character standing on a ship deck, shonen manga, strong black line art, cel shading, expressive eyes, dynamic pose, clean linework"
             )
-            scale = gr.Slider(0.0, 1.5, 1.0, 0.01, label="Scale (Face/ID strength)")
-            lora_strength = gr.Slider(0.0, 1.5, 0.85, 0.05, label="LoRA strength (One Piece)")
-            with gr.Accordion("Advanced Options", open=False):
-                guidance_scale = gr.Slider(1.0, 7.0, 3.5, 0.1, label="Guidance scale")
-                num_inference_steps = gr.Slider(5, 50, 28, 1, label="Inference steps")
-                seed = gr.Slider(-MAX_SEED, MAX_SEED, 123456, 1, label="Seed")
-                randomize_seed = gr.Checkbox(value=True, label="Randomize seed")
-                width = gr.Slider(704, 1152, 896, 32, label="Width")
-                height = gr.Slider(704, 1152, 896, 32, label="Height")
-            generate_button = gr.Button("Generate Image", variant="primary")
         with gr.Column():
-            output_gallery = gr.Gallery(label="Generated Image").style(grid=[1], height=640)
     generate_button.click(
         fn=randomize_seed_fn,
         inputs=[seed, randomize_seed],
         outputs=seed,
         queue=False,
     ).then(
         fn=create_image,
         inputs=[
-            image_pil, prompt, scale, guidance_scale, num_inference_steps,
-            seed, lora_strength, width, height
         ],
-        outputs=output_gallery,
     )
-block.launch()

+import sys, os
 sys.path.append('../')
 import spaces
 import torch
 import random
 import numpy as np
 from PIL import Image
+import gradio as gr
 from huggingface_hub import hf_hub_download
 from transformers import AutoModelForImageSegmentation
 from torchvision import transforms
 from pipeline import InstantCharacterFluxPipeline
+# ----------------------------
+# Global
+# ----------------------------
 MAX_SEED = np.iinfo(np.int32).max
+device = "cuda" if torch.cuda.is_available() else torch.device("cpu")
+dtype = torch.float16 if "cuda" in str(device) else torch.float32
+# ----------------------------
+# Pre-trained / assets paths
+# ----------------------------
+ip_adapter_path = hf_hub_download(
+    repo_id="tencent/InstantCharacter",
+    filename="instantcharacter_ip-adapter.bin"
+)
+base_model = 'black-forest-labs/FLUX.1-dev'
+image_encoder_path = 'google/siglip-so400m-patch14-384'
+image_encoder_2_path = 'facebook/dinov2-giant'
+birefnet_path = 'ZhengPeng7/BiRefNet'
+# Styles LoRA (existants)
+makoto_style_lora_path = hf_hub_download(
+    repo_id="InstantX/FLUX.1-dev-LoRA-Makoto-Shinkai",
+    filename="Makoto_Shinkai_style.safetensors"
+)
+ghibli_style_lora_path = hf_hub_download(
+    repo_id="InstantX/FLUX.1-dev-LoRA-Ghibli",
+    filename="ghibli_style.safetensors"
+)
+# >>> NEW: One Piece LoRA (fichier local dans ton Space)
+# Place le fichier à la racine (comme sur ta capture).
+onepiece_style_lora_path = os.path.join(
+    os.path.dirname(__file__),
+    "onepiece_flux_v2.safetensors"
+)
+ONEPIECE_TRIGGER = "onepiece style"
+# ----------------------------
+# Init pipeline
+# ----------------------------
 pipe = InstantCharacterFluxPipeline.from_pretrained(
+    base_model, torch_dtype=torch.bfloat16
 )
 pipe.to(device)
+# InstantCharacter adapters
 pipe.init_adapter(
     image_encoder_path=image_encoder_path,
     image_encoder_2_path=image_encoder_2_path,
+    subject_ipadapter_cfg=dict(
+        subject_ip_adapter_path=ip_adapter_path,
+        nb_token=1024
+    ),
 )
+# ----------------------------
+# Matting model (background removal)
+# ----------------------------
 birefnet = AutoModelForImageSegmentation.from_pretrained(
+    birefnet_path, trust_remote_code=True
 )
+birefnet.to('cuda' if torch.cuda.is_available() else device)
+birefnet.eval()
 birefnet_transform_image = transforms.Compose([
     transforms.Resize((1024, 1024)),
     transforms.ToTensor(),
+    transforms.Normalize(
+        [0.485, 0.456, 0.406],
+        [0.229, 0.224, 0.225]
+    )
 ])
 def remove_bkg(subject_image):
     def infer_matting(img_pil):
+        input_images = birefnet_transform_image(img_pil).unsqueeze(0).to(
+            'cuda' if torch.cuda.is_available() else device
+        )
         with torch.no_grad():
+            preds = birefnet(input_images)[-1].sigmoid().cpu()
         pred = preds[0].squeeze()
+        pred_pil = transforms.ToPILImage()(pred)
+        mask = pred_pil.resize(img_pil.size)
+        mask = np.array(mask)
+        mask = mask[..., None]
+        return mask
     def get_bbox_from_mask(mask, th=128):
+        height, width = mask.shape[:2]
+        x1, y1, x2, y2 = 0, 0, width - 1, height - 1
+        sample = np.max(mask, axis=0)
+        for idx in range(width):
+            if sample[idx] >= th:
+                x1 = idx
+                break
+        sample = np.max(mask[:, ::-1], axis=0)
+        for idx in range(width):
+            if sample[idx] >= th:
+                x2 = width - 1 - idx
+                break
+        sample = np.max(mask, axis=1)
+        for idx in range(height):
+            if sample[idx] >= th:
+                y1 = idx
+                break
+        sample = np.max(mask[::-1], axis=1)
+        for idx in range(height):
+            if sample[idx] >= th:
+                y2 = height - 1 - idx
+                break
+        x1 = np.clip(x1, 0, width-1).round().astype(np.int32)
+        y1 = np.clip(y1, 0, height-1).round().astype(np.int32)
+        x2 = np.clip(x2, 0, width-1).round().astype(np.int32)
+        y2 = np.clip(y2, 0, height-1).round().astype(np.int32)
+        return [x1, y1, x2, y2]
+    def pad_to_square(image, pad_value=255, random=False):
+        H, W = image.shape[0], image.shape[1]
         if H == W:
             return image
+        padd = abs(H - W)
+        padd_1 = int(np.random.randint(0, padd)) if random else int(padd / 2)
+        padd_2 = padd - padd_1
         if H > W:
+            pad_param = ((0, 0), (padd_1, padd_2), (0, 0))
         else:
+            pad_param = ((padd_1, padd_2), (0, 0), (0, 0))
+        image = np.pad(image, pad_param, 'constant', constant_values=pad_value)
+        return image
+    salient_object_mask = infer_matting(subject_image)[..., 0]
+    x1, y1, x2, y2 = get_bbox_from_mask(salient_object_mask)
+    subject_image_np = np.array(subject_image)
+    salient_object_mask[salient_object_mask > 128] = 255
+    salient_object_mask[salient_object_mask < 128] = 0
+    sample_mask = np.concatenate([salient_object_mask[..., None]]*3, axis=2)
+    obj_image = sample_mask / 255 * subject_image_np + (1 - sample_mask / 255) * 255
+    crop_obj_image = obj_image[y1:y2, x1:x2]
+    crop_pad_obj_image = pad_to_square(crop_obj_image, 255)
+    subject_image = Image.fromarray(crop_pad_obj_image.astype(np.uint8))
+    return subject_image
 def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    return seed
+def get_example():
+    case = [
+        [
+            "./assets/girl.jpg",
+            "A girl is playing a guitar in street, " + ONEPIECE_TRIGGER,
+            0.9,
+            'One Piece style',
+        ],
+        [
+            "./assets/boy.jpg",
+            "A boy is riding a bike in snow, " + ONEPIECE_TRIGGER,
+            0.9,
+            'One Piece style',
+        ],
+    ]
+    return case
+def run_for_examples(source_image, prompt, scale, style_mode):
+    return create_image(
+        input_image=source_image,
+        prompt=prompt,
+        scale=scale,
+        guidance_scale=3.5,
+        num_inference_steps=28,
+        seed=123456,
+        style_mode=style_mode,
+    )
 @spaces.GPU
 def create_image(
     input_image,
     prompt,
+    scale,
     guidance_scale,
     num_inference_steps,
     seed,
+    style_mode=None
 ):
+    # retire le fond automatiquement
     input_image = remove_bkg(input_image)
+    if style_mode is None:
+        images = pipe(
+            prompt=prompt,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+            width=1024,
+            height=1024,
+            subject_image=input_image,
+            subject_scale=scale,
+            generator=torch.manual_seed(seed),
+        ).images
+    else:
+        # mapping des styles
+        if style_mode == 'Makoto Shinkai style':
+            lora_file_path = makoto_style_lora_path
+            trigger = 'Makoto Shinkai style'
+        elif style_mode == 'Ghibli style':
+            lora_file_path = ghibli_style_lora_path
+            trigger = 'ghibli style'
+        elif style_mode == 'One Piece style':
+            lora_file_path = onepiece_style_lora_path
+            trigger = ONEPIECE_TRIGGER
+        else:
+            # fallback: pas de LoRA
+            lora_file_path = None
+            trigger = ""
+        if lora_file_path is None:
+            images = pipe(
+                prompt=prompt,
+                num_inference_steps=num_inference_steps,
+                guidance_scale=guidance_scale,
+                width=1024,
+                height=1024,
+                subject_image=input_image,
+                subject_scale=scale,
+                generator=torch.manual_seed(seed),
+            ).images
+        else:
+            images = pipe.with_style_lora(
+                lora_file_path=lora_file_path,
+                trigger=trigger,
+                prompt=prompt,
+                num_inference_steps=num_inference_steps,
+                guidance_scale=guidance_scale,
+                width=1024,
+                height=1024,
+                subject_image=input_image,
+                subject_scale=scale,
+                generator=torch.manual_seed(seed),
+            ).images
     return images
+# ----------------------------
+# UI
+# ----------------------------
+title = r"""
+<h1 align="center">InstantCharacter : Personalize Any Characters with a Scalable Diffusion Transformer Framework</h1>
+"""
+description = r"""
+<b>Official 🤗 Gradio demo</b> for <a href='https://instantcharacter.github.io/' target='_blank'><b>InstantCharacter : Personalize Any Characters with a Scalable Diffusion Transformer Framework</b></a>.<br>
+How to use:<br>
+1. Upload a character image, removing background would be preferred.
+2. Enter a text prompt to describe what you hope the character does.
+3. Choose a style (e.g., <code>One Piece style</code>).
+4. Click <b>Generate Image</b>.
+"""
+article = r"""
+---
+📝 **Citation**
+<br>
+If our work is helpful for your research or applications, please cite us via:
+```bibtex
+@article{tao2025instantcharacter,
+  title={InstantCharacter: Personalize Any Characters with a Scalable Diffusion Transformer Framework},
+  author={Tao, Jiale and Zhang, Yanbing and Wang, Qixun and Cheng, Yiji and Wang, Haofan and Bai, Xu and Zhou, Zhengguang and Li, Ruihuang and Wang, Linqing and Wang, Chunyu and others},
+  journal={arXiv preprint arXiv:2504.12395},
+  year={2025}
+}
+block = gr.Blocks(css="footer {visibility: hidden}").queue(max_size=10, api_open=False)
 with block:
+gr.Markdown(title)
+gr.Markdown(description)
+with gr.Tabs():
     with gr.Row():
         with gr.Column():
+            with gr.Row():
+                with gr.Column():
+                    image_pil = gr.Image(label="Source Image", type='pil')
+            # Astuce : pense à inclure le trigger dans le prompt si besoin
             prompt = gr.Textbox(
                 label="Prompt",
+                value=f"a character is riding a bike in snow, {ONEPIECE_TRIGGER}"
             )
+            scale = gr.Slider(minimum=0, maximum=1.5, step=0.01, value=1.0, label="Scale")
+            style_mode = gr.Dropdown(
+                label='Style',
+                choices=[None, 'Makoto Shinkai style', 'Ghibli style', 'One Piece style'],
+                value='One Piece style'
+            )
+            with gr.Accordion(open=False, label="Advanced Options"):
+                guidance_scale = gr.Slider(minimum=1, maximum=7.0, step=0.01, value=3.5, label="guidance scale")
+                num_inference_steps = gr.Slider(minimum=5, maximum=50.0, step=1.0, value=28, label="num inference steps")
+                seed = gr.Slider(minimum=-1000000, maximum=1000000, value=123456, step=1, label="Seed Value")
+                randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+            generate_button = gr.Button("Generate Image")
         with gr.Column():
+            generated_image = gr.Gallery(label="Generated Image")
     generate_button.click(
         fn=randomize_seed_fn,
         inputs=[seed, randomize_seed],
         outputs=seed,
         queue=False,
+        api_name=False,
     ).then(
         fn=create_image,
         inputs=[
+            image_pil,
+            prompt,
+            scale,
+            guidance_scale,
+            num_inference_steps,
+            seed,
+            style_mode,
         ],
+        outputs=[generated_image]
     )
+gr.Examples(
+    examples=get_example(),
+    inputs=[image_pil, prompt, scale, style_mode],
+    fn=run_for_examples,
+    outputs=[generated_image],
+    cache_examples=True,
+)
+gr.Markdown(article)
+block.launch()