InstantCharacter-OnePiece

Paused

App Files Files Community

Nad54 commited on Nov 10, 2025

Commit

a1aa9b4

verified ·

1 Parent(s): c6d931e

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -36

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import sys, os
 sys.path.append("../")
-# ---- anti-fragmentation VRAM, à définir AVANT toute init CUDA ----
 os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
 import spaces
@@ -66,7 +66,6 @@ FEMALE_PROMPT = (
 pipe = InstantCharacterFluxPipeline.from_pretrained(base_model, torch_dtype=torch.bfloat16)
 pipe.to(device)
-# Offload/slicing/tiling pour réduire les pics VRAM
 try:
     if hasattr(pipe, "enable_sequential_cpu_offload"):
         pipe.enable_sequential_cpu_offload()
@@ -85,7 +84,6 @@ pipe.init_adapter(
 # --------------------------------------------
 # Background remover
 # --------------------------------------------
-# On charge BiRefNet sur CPU; on le montera sur GPU juste pour l'inférence puis retour CPU.
 birefnet = AutoModelForImageSegmentation.from_pretrained(birefnet_path, trust_remote_code=True)
 birefnet.to("cpu")
 birefnet.eval()
@@ -97,49 +95,41 @@ birefnet_transform = transforms.Compose([
 def remove_bkg(subject_image):
     def infer_matting(img_pil):
-        # move temporairement sur GPU si dispo
         run_dev = device if torch.cuda.is_available() else "cpu"
-        try:
-            birefnet.to(run_dev)
-        except Exception:
-            run_dev = "cpu"
-            birefnet.to("cpu")
         inp = birefnet_transform(img_pil).unsqueeze(0).to(run_dev)
         with torch.no_grad():
             preds = birefnet(inp)[-1].sigmoid().cpu()
         pred = preds[0].squeeze()
         mask = transforms.ToPILImage()(pred).resize(img_pil.size)
-        # libère VRAM : retour CPU + vidage cache
-        try:
-            birefnet.to("cpu")
-        except Exception:
-            pass
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
         return np.array(mask)[..., None]
-    def pad_to_square(image, pad_value=255):
         H, W = image.shape[:2]
-        if H == W:
-            return image
-        pad = abs(H - W)
-        pad1, pad2 = pad // 2, pad - pad // 2
-        pad_param = ((0, 0), (pad1, pad2), (0, 0)) if H > W else ((pad1, pad2), (0, 0), (0, 0))
-        return np.pad(image, pad_param, "constant", constant_values=pad_value)
     mask = infer_matting(subject_image)[..., 0]
     subject_np = np.array(subject_image)
     mask = (mask > 128).astype(np.uint8) * 255
     sample_mask = np.stack([mask] * 3, axis=-1)
     obj = sample_mask / 255 * subject_np + (1 - sample_mask / 255) * 255
-    cropped = pad_to_square(obj, 255)
-    return Image.fromarray(cropped.astype(np.uint8))
 # --------------------------------------------
-# Simple gender detector (CLIP zero-shot)
 # --------------------------------------------
 clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
 clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
@@ -150,8 +140,8 @@ def detect_gender(img_pil: Image.Image) -> str:
     texts = ["a portrait photo of a man", "a portrait photo of a woman"]
     inputs = clip_processor(text=texts, images=img_pil.convert("RGB"), return_tensors="pt", padding=True).to(device)
     outputs = clip_model(**inputs)
-    logits_per_image = outputs.logits_per_image.squeeze(0)
-    idx = int(torch.argmax(logits_per_image).item())
     return "male" if idx == 0 else "female"
 # --------------------------------------------
@@ -162,7 +152,6 @@ def randomize_seed(seed, randomize):
 @spaces.GPU
 def create_image(input_image, prompt, scale, guidance_scale, num_inference_steps, seed, style_mode, negative_prompt=""):
-    # purge VRAM avant d'attaquer
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
@@ -183,7 +172,7 @@ def create_image(input_image, prompt, scale, guidance_scale, num_inference_steps
         negative_prompt=negative_prompt,
         num_inference_steps=num_inference_steps,
         guidance_scale=guidance_scale,
-        width=1024, height=1024,  # si OOM persiste, passe à 896 ou 768
         subject_image=input_image,
         subject_scale=scale,
         generator=generator,
@@ -194,13 +183,12 @@ def create_image(input_image, prompt, scale, guidance_scale, num_inference_steps
     else:
         result = pipe(**common_args)
-    # purge VRAM après génération
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
     return result.images
 # --------------------------------------------
-# UI definition (Gradio 5)
 # --------------------------------------------
 def generate_fn(image, prompt, scale, style, guidance, steps, seed, randomize, negative_prompt, auto_prompt):
     if auto_prompt and image is not None:
@@ -212,15 +200,15 @@ def generate_fn(image, prompt, scale, style, guidance, steps, seed, randomize, n
 title = "🎨 InstantCharacter + One Piece LoRA"
 description = (
-    "Upload your photo, describe your scene, or tick **Auto One Piece Prompt** to auto-pick a gender-aware template. "
-    "Choose **One Piece style** to apply the LoRA."
 )
 demo = gr.Interface(
     fn=generate_fn,
     inputs=[
         gr.Image(label="Source Image", type="pil"),
-        gr.Textbox(label="Prompt", value=f", {ONEPIECE_TRIGGER}"),
         gr.Slider(0, 1.5, value=1.0, step=0.01, label="Scale"),
         gr.Dropdown(choices=[None, "Makoto Shinkai style", "Ghibli style", "One Piece style"],
                     value="One Piece style", label="Style"),

 import sys, os
 sys.path.append("../")
+# ---- anti-fragmentation VRAM ----
 os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
 import spaces
 pipe = InstantCharacterFluxPipeline.from_pretrained(base_model, torch_dtype=torch.bfloat16)
 pipe.to(device)
 try:
     if hasattr(pipe, "enable_sequential_cpu_offload"):
         pipe.enable_sequential_cpu_offload()
 # --------------------------------------------
 # Background remover
 # --------------------------------------------
 birefnet = AutoModelForImageSegmentation.from_pretrained(birefnet_path, trust_remote_code=True)
 birefnet.to("cpu")
 birefnet.eval()
 def remove_bkg(subject_image):
     def infer_matting(img_pil):
         run_dev = device if torch.cuda.is_available() else "cpu"
+        birefnet.to(run_dev)
         inp = birefnet_transform(img_pil).unsqueeze(0).to(run_dev)
         with torch.no_grad():
             preds = birefnet(inp)[-1].sigmoid().cpu()
         pred = preds[0].squeeze()
         mask = transforms.ToPILImage()(pred).resize(img_pil.size)
+        birefnet.to("cpu")
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
         return np.array(mask)[..., None]
+    def pad_to_ratio(image, target_w=1024, target_h=768, pad_value=255):
         H, W = image.shape[:2]
+        aspect_target = target_w / target_h
+        aspect = W / H
+        if abs(aspect - aspect_target) < 1e-3:
+            # déjà bon ratio
+            resized = Image.fromarray(image.astype(np.uint8)).resize((target_w, target_h), Image.LANCZOS)
+            return np.array(resized)
+        # centrer et crop/pad selon le ratio
+        img = Image.fromarray(image.astype(np.uint8))
+        img = img.resize((target_w, target_h), Image.LANCZOS)
+        return np.array(img)
     mask = infer_matting(subject_image)[..., 0]
     subject_np = np.array(subject_image)
     mask = (mask > 128).astype(np.uint8) * 255
     sample_mask = np.stack([mask] * 3, axis=-1)
     obj = sample_mask / 255 * subject_np + (1 - sample_mask / 255) * 255
+    fixed = pad_to_ratio(obj, 1024, 768)
+    return Image.fromarray(fixed.astype(np.uint8))
 # --------------------------------------------
+# Gender detector
 # --------------------------------------------
 clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
 clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
     texts = ["a portrait photo of a man", "a portrait photo of a woman"]
     inputs = clip_processor(text=texts, images=img_pil.convert("RGB"), return_tensors="pt", padding=True).to(device)
     outputs = clip_model(**inputs)
+    logits = outputs.logits_per_image.squeeze(0)
+    idx = int(torch.argmax(logits).item())
     return "male" if idx == 0 else "female"
 # --------------------------------------------
 @spaces.GPU
 def create_image(input_image, prompt, scale, guidance_scale, num_inference_steps, seed, style_mode, negative_prompt=""):
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
         negative_prompt=negative_prompt,
         num_inference_steps=num_inference_steps,
         guidance_scale=guidance_scale,
+        width=1024, height=768,   # <<< résolution fixe
         subject_image=input_image,
         subject_scale=scale,
         generator=generator,
     else:
         result = pipe(**common_args)
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
     return result.images
 # --------------------------------------------
+# UI definition
 # --------------------------------------------
 def generate_fn(image, prompt, scale, style, guidance, steps, seed, randomize, negative_prompt, auto_prompt):
     if auto_prompt and image is not None:
 title = "🎨 InstantCharacter + One Piece LoRA"
 description = (
+    "Upload your photo and generate yourself as a One Piece character (output always 1024×768). "
+    "Tick **Auto One Piece Prompt** for gender-aware templates."
 )
 demo = gr.Interface(
     fn=generate_fn,
     inputs=[
         gr.Image(label="Source Image", type="pil"),
+        gr.Textbox(label="Prompt", value=f"a character is riding a bike in snow, {ONEPIECE_TRIGGER}"),
         gr.Slider(0, 1.5, value=1.0, step=0.01, label="Scale"),
         gr.Dropdown(choices=[None, "Makoto Shinkai style", "Ghibli style", "One Piece style"],
                     value="One Piece style", label="Style"),