InstantCharacter-OnePiece

Paused

App Files Files Community

Nad54 commited on Nov 11, 2025

Commit

5080fa8

verified ·

1 Parent(s): a1aa9b4

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -46

app.py CHANGED Viewed

@@ -1,9 +1,6 @@
 import sys, os
 sys.path.append("../")
-# ---- anti-fragmentation VRAM ----
-os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
 import spaces
 import torch
 import random
@@ -65,16 +62,6 @@ FEMALE_PROMPT = (
 # --------------------------------------------
 pipe = InstantCharacterFluxPipeline.from_pretrained(base_model, torch_dtype=torch.bfloat16)
 pipe.to(device)
-try:
-    if hasattr(pipe, "enable_sequential_cpu_offload"):
-        pipe.enable_sequential_cpu_offload()
-    if hasattr(pipe, "vae"):
-        pipe.vae.enable_slicing()
-        pipe.vae.enable_tiling()
-except Exception:
-    pass
 pipe.init_adapter(
     image_encoder_path=image_encoder_path,
     image_encoder_2_path=image_encoder_2_path,
@@ -85,7 +72,7 @@ pipe.init_adapter(
 # Background remover
 # --------------------------------------------
 birefnet = AutoModelForImageSegmentation.from_pretrained(birefnet_path, trust_remote_code=True)
-birefnet.to("cpu")
 birefnet.eval()
 birefnet_transform = transforms.Compose([
     transforms.Resize((1024, 1024)),
@@ -95,41 +82,32 @@ birefnet_transform = transforms.Compose([
 def remove_bkg(subject_image):
     def infer_matting(img_pil):
-        run_dev = device if torch.cuda.is_available() else "cpu"
-        birefnet.to(run_dev)
-        inp = birefnet_transform(img_pil).unsqueeze(0).to(run_dev)
         with torch.no_grad():
             preds = birefnet(inp)[-1].sigmoid().cpu()
         pred = preds[0].squeeze()
         mask = transforms.ToPILImage()(pred).resize(img_pil.size)
-        birefnet.to("cpu")
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
         return np.array(mask)[..., None]
-    def pad_to_ratio(image, target_w=1024, target_h=768, pad_value=255):
         H, W = image.shape[:2]
-        aspect_target = target_w / target_h
-        aspect = W / H
-        if abs(aspect - aspect_target) < 1e-3:
-            # déjà bon ratio
-            resized = Image.fromarray(image.astype(np.uint8)).resize((target_w, target_h), Image.LANCZOS)
-            return np.array(resized)
-        # centrer et crop/pad selon le ratio
-        img = Image.fromarray(image.astype(np.uint8))
-        img = img.resize((target_w, target_h), Image.LANCZOS)
-        return np.array(img)
     mask = infer_matting(subject_image)[..., 0]
     subject_np = np.array(subject_image)
     mask = (mask > 128).astype(np.uint8) * 255
     sample_mask = np.stack([mask] * 3, axis=-1)
     obj = sample_mask / 255 * subject_np + (1 - sample_mask / 255) * 255
-    fixed = pad_to_ratio(obj, 1024, 768)
-    return Image.fromarray(fixed.astype(np.uint8))
 # --------------------------------------------
-# Gender detector
 # --------------------------------------------
 clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
 clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
@@ -140,8 +118,8 @@ def detect_gender(img_pil: Image.Image) -> str:
     texts = ["a portrait photo of a man", "a portrait photo of a woman"]
     inputs = clip_processor(text=texts, images=img_pil.convert("RGB"), return_tensors="pt", padding=True).to(device)
     outputs = clip_model(**inputs)
-    logits = outputs.logits_per_image.squeeze(0)
-    idx = int(torch.argmax(logits).item())
     return "male" if idx == 0 else "female"
 # --------------------------------------------
@@ -152,9 +130,6 @@ def randomize_seed(seed, randomize):
 @spaces.GPU
 def create_image(input_image, prompt, scale, guidance_scale, num_inference_steps, seed, style_mode, negative_prompt=""):
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
     input_image = remove_bkg(input_image)
     if style_mode == "Makoto Shinkai style":
@@ -172,7 +147,7 @@ def create_image(input_image, prompt, scale, guidance_scale, num_inference_steps
         negative_prompt=negative_prompt,
         num_inference_steps=num_inference_steps,
         guidance_scale=guidance_scale,
-        width=1024, height=768,   # <<< résolution fixe
         subject_image=input_image,
         subject_scale=scale,
         generator=generator,
@@ -182,13 +157,10 @@ def create_image(input_image, prompt, scale, guidance_scale, num_inference_steps
         result = pipe.with_style_lora(lora_file_path=lora_path, trigger=trigger, **common_args)
     else:
         result = pipe(**common_args)
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
     return result.images
 # --------------------------------------------
-# UI definition
 # --------------------------------------------
 def generate_fn(image, prompt, scale, style, guidance, steps, seed, randomize, negative_prompt, auto_prompt):
     if auto_prompt and image is not None:
@@ -200,8 +172,8 @@ def generate_fn(image, prompt, scale, style, guidance, steps, seed, randomize, n
 title = "🎨 InstantCharacter + One Piece LoRA"
 description = (
-    "Upload your photo and generate yourself as a One Piece character (output always 1024×768). "
-    "Tick **Auto One Piece Prompt** for gender-aware templates."
 )
 demo = gr.Interface(

 import sys, os
 sys.path.append("../")
 import spaces
 import torch
 import random
 # --------------------------------------------
 pipe = InstantCharacterFluxPipeline.from_pretrained(base_model, torch_dtype=torch.bfloat16)
 pipe.to(device)
 pipe.init_adapter(
     image_encoder_path=image_encoder_path,
     image_encoder_2_path=image_encoder_2_path,
 # Background remover
 # --------------------------------------------
 birefnet = AutoModelForImageSegmentation.from_pretrained(birefnet_path, trust_remote_code=True)
+birefnet.to(device)
 birefnet.eval()
 birefnet_transform = transforms.Compose([
     transforms.Resize((1024, 1024)),
 def remove_bkg(subject_image):
     def infer_matting(img_pil):
+        inp = birefnet_transform(img_pil).unsqueeze(0).to(device)
         with torch.no_grad():
             preds = birefnet(inp)[-1].sigmoid().cpu()
         pred = preds[0].squeeze()
         mask = transforms.ToPILImage()(pred).resize(img_pil.size)
         return np.array(mask)[..., None]
+    def pad_to_square(image, pad_value=255):
         H, W = image.shape[:2]
+        if H == W:
+            return image
+        pad = abs(H - W)
+        pad1, pad2 = pad // 2, pad - pad // 2
+        pad_param = ((0, 0), (pad1, pad2), (0, 0)) if H > W else ((pad1, pad2), (0, 0), (0, 0))
+        return np.pad(image, pad_param, "constant", constant_values=pad_value)
     mask = infer_matting(subject_image)[..., 0]
     subject_np = np.array(subject_image)
     mask = (mask > 128).astype(np.uint8) * 255
     sample_mask = np.stack([mask] * 3, axis=-1)
     obj = sample_mask / 255 * subject_np + (1 - sample_mask / 255) * 255
+    cropped = pad_to_square(obj, 255)
+    return Image.fromarray(cropped.astype(np.uint8))
 # --------------------------------------------
+# Simple gender detector (CLIP zero-shot)
 # --------------------------------------------
 clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
 clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
     texts = ["a portrait photo of a man", "a portrait photo of a woman"]
     inputs = clip_processor(text=texts, images=img_pil.convert("RGB"), return_tensors="pt", padding=True).to(device)
     outputs = clip_model(**inputs)
+    logits_per_image = outputs.logits_per_image.squeeze(0)
+    idx = int(torch.argmax(logits_per_image).item())
     return "male" if idx == 0 else "female"
 # --------------------------------------------
 @spaces.GPU
 def create_image(input_image, prompt, scale, guidance_scale, num_inference_steps, seed, style_mode, negative_prompt=""):
     input_image = remove_bkg(input_image)
     if style_mode == "Makoto Shinkai style":
         negative_prompt=negative_prompt,
         num_inference_steps=num_inference_steps,
         guidance_scale=guidance_scale,
+        width=1024, height=780,
         subject_image=input_image,
         subject_scale=scale,
         generator=generator,
         result = pipe.with_style_lora(lora_file_path=lora_path, trigger=trigger, **common_args)
     else:
         result = pipe(**common_args)
     return result.images
 # --------------------------------------------
+# UI definition (Gradio 5)
 # --------------------------------------------
 def generate_fn(image, prompt, scale, style, guidance, steps, seed, randomize, negative_prompt, auto_prompt):
     if auto_prompt and image is not None:
 title = "🎨 InstantCharacter + One Piece LoRA"
 description = (
+    "Upload your photo, describe your scene, or tick **Auto One Piece Prompt** to auto-pick a gender-aware template. "
+    "Choose **One Piece style** to apply the LoRA."
 )
 demo = gr.Interface(