Spaces:

hongyu12321
/

RedFish

Sleeping

App Files Files Community

hongyu12321 commited on Sep 13, 2025

Commit

2187ded

verified ·

1 Parent(s): 165f68d

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -100

app.py CHANGED Viewed

@@ -1,16 +1,17 @@
-# app.py — Age-first + FAST cartoon (Turbo), nicer framing & magical background
 import os
 os.environ["TRANSFORMERS_NO_TF"] = "1"
 os.environ["TRANSFORMERS_NO_FLAX"] = "1"
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 import gradio as gr
 from PIL import Image, ImageDraw
 import numpy as np
 import torch
-# ------------------ Age estimator (Hugging Face) ------------------
 from transformers import AutoImageProcessor, AutoModelForImageClassification
 HF_MODEL_ID = "nateraw/vit-age-classifier"
@@ -41,11 +42,16 @@ class PretrainedAgeEstimator:
                        for i, p in enumerate(probs))
         return expected, top
-# ------------------ Face detection with WIDER crop ------------------
 from facenet_pytorch import MTCNN
 class FaceCropper:
-    """Detect faces; return (cropped_wide, annotated). Adds margin so face isn't full screen."""
     def __init__(self, device: str | None = None, margin_scale: float = 1.8):
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.mtcnn = MTCNN(keep_all=True, device=self.device)
@@ -56,49 +62,65 @@ class FaceCropper:
             return img.convert("RGB")
         return Image.fromarray(img).convert("RGB")
-    def detect_and_crop_wide(self, img, select="largest"):
         pil = self._ensure_pil(img)
         W, H = pil.size
         boxes, probs = self.mtcnn.detect(pil)
         annotated = pil.copy()
         draw = ImageDraw.Draw(annotated)
         if boxes is None or len(boxes) == 0:
             return None, annotated
-        # choose largest face
-        idx = int(np.argmax([(b[2]-b[0])*(b[3]-b[1]) for b in boxes]))
-        if isinstance(select, int) and 0 <= select < len(boxes):
-            idx = select
-        x1, y1, x2, y2 = boxes[idx]
         # draw all boxes
         for b, p in zip(boxes, probs):
             bx1, by1, bx2, by2 = map(float, b)
             draw.rectangle([bx1, by1, bx2, by2], outline=(255, 0, 0), width=3)
             draw.text((bx1, max(0, by1-12)), f"{p:.2f}", fill=(255, 0, 0))
-        # expand with margin
-        cx, cy = (x1 + x2) / 2.0, (y1 + y2) / 2.0
-        w, h = (x2 - x1), (y2 - y1)
-        side = max(w, h) * self.margin_scale  # wider frame to include background/shoulders
-        # keep a pleasant portrait aspect (4:5)
-        target_w = side
-        target_h = side * 1.25
-        nx1 = int(max(0, cx - target_w/2))
-        nx2 = int(min(W, cx + target_w/2))
-        ny1 = int(max(0, cy - target_h/2))
-        ny2 = int(min(H, cy + target_h/2))
         crop = pil.crop((nx1, ny1, nx2, ny2))
         return crop, annotated
 # ------------------ FAST Cartoonizer (SD-Turbo) ------------------
 from diffusers import AutoPipelineForImage2Image
-# Turbo is very fast (1–4 steps). Great for stylization on CPU/GPU.
 TURBO_ID = "stabilityai/sd-turbo"
 def load_turbo_pipe(device):
@@ -107,20 +129,19 @@ def load_turbo_pipe(device):
         TURBO_ID,
         torch_dtype=dtype,
         safety_checker=None,
-    )
-    pipe = pipe.to(device)
     try:
         pipe.enable_attention_slicing()
     except Exception:
         pass
     return pipe
-# ------------------ Init models once ------------------
 age_est = PretrainedAgeEstimator()
-cropper = FaceCropper(device=age_est.device, margin_scale=1.8)  # 1.6–2.0 feels good
 sd_pipe = load_turbo_pipe(age_est.device)
-# ------------------ Prompts ------------------
 DEFAULT_POSITIVE = (
     "beautiful princess portrait, elegant gown, tiara, soft magical lighting, "
     "sparkles, dreamy castle background, painterly, clean lineart, vibrant but natural colors, "
@@ -131,52 +152,62 @@ DEFAULT_NEGATIVE = (
     "blurry, watermark, text, logo"
 )
-# ------------------ Helpers ------------------
 def _ensure_pil(img):
     return img if isinstance(img, Image.Image) else Image.fromarray(img)
 def _resize_512(im: Image.Image):
-    # keep aspect, fit longest side to 512 (faster, fewer artifacts)
     w, h = im.size
     scale = 512 / max(w, h)
     if scale < 1.0:
         im = im.resize((int(w*scale), int(h*scale)), Image.LANCZOS)
     return im
-# ------------------ 1) Predict Age (fast) ------------------
 @torch.inference_mode()
-def predict_age_only(img, auto_crop=True):
     if img is None:
         return {}, "Please upload an image.", None
-    img = _ensure_pil(img).convert("RGB")
-    face_wide = None
-    annotated = None
     if auto_crop:
-        face_wide, annotated = cropper.detect_and_crop_wide(img)
-    target = face_wide if face_wide is not None else img
     age, top = age_est.predict(target, topk=5)
     probs = {lbl: float(p) for lbl, p in top}
-    summary = f"**Estimated age:** {age:.1f} years"
-    return probs, summary, (annotated if annotated is not None else img)
-# ------------------ 2) Generate Cartoon (fast) ------------------
 @torch.inference_mode()
-def generate_cartoon(img, prompt="", auto_crop=True, strength=0.5, steps=2, seed=-1):
     if img is None:
         return None
-    img = _ensure_pil(img).convert("RGB")
-    # use wide face crop to include background/shoulders
-    if auto_crop:
-        face_wide, _ = cropper.detect_and_crop_wide(img)
-        if face_wide is not None:
-            img = face_wide
-    img = _resize_512(img)
-    # prompt assembly
     user = (prompt or "").strip()
     pos = DEFAULT_POSITIVE if not user else f"{DEFAULT_POSITIVE}, {user}"
     neg = DEFAULT_NEGATIVE
@@ -185,47 +216,13 @@ def generate_cartoon(img, prompt="", auto_crop=True, strength=0.5, steps=2, seed
     if isinstance(seed, (int, float)) and int(seed) >= 0:
         generator = torch.Generator(device=age_est.device).manual_seed(int(seed))
-    # Turbo likes low steps and guidance ~0
-    out = sd_pipe(
-        prompt=pos,
-        negative_prompt=neg,
-        image=img,
-        strength=float(strength),          # 0.4–0.6 keeps identity & adds dress/background
-        guidance_scale=0.0,                # Turbo typically uses 0
-        num_inference_steps=int(steps),    # 1–4 steps → very fast
-        generator=generator,
-    )
-    return out.images[0]
-# ------------------ UI ------------------
-with gr.Blocks(title="Age First + Fast Cartoon") as demo:
-    gr.Markdown("# Upload or capture once — get age prediction first, then a faster cartoon ✨")
-    with gr.Row():
-        with gr.Column(scale=1):
-            img_in = gr.Image(sources=["upload", "webcam"], type="pil", label="Upload / Webcam")
-            auto = gr.Checkbox(True, label="Auto face crop (wide, recommended)")
-            prompt = gr.Textbox(
-                label="(Optional) Extra cartoon style",
-                placeholder="e.g., studio ghibli watercolor, soft bokeh, pastel palette"
-            )
-            with gr.Row():
-                strength = gr.Slider(0.3, 0.8, value=0.5, step=0.05, label="Cartoon strength")
-                steps = gr.Slider(1, 4, value=2, step=1, label="Turbo steps (1–4)")
-                seed = gr.Number(value=-1, precision=0, label="Seed (-1 = random)")
-            btn_age = gr.Button("Predict Age (fast)", variant="primary")
-            btn_cartoon = gr.Button("Make Cartoon (fast)", variant="secondary")
-        with gr.Column(scale=1):
-            probs_out = gr.Label(num_top_classes=5, label="Age Prediction (probabilities)")
-            age_md = gr.Markdown(label="Age Summary")
-            preview = gr.Image(label="Detection Preview")
-            cartoon_out = gr.Image(label="Cartoon Result")
-    # Wire the buttons
-    btn_age.click(fn=predict_age_only, inputs=[img_in, auto], outputs=[probs_out, age_md, preview])
-    btn_cartoon.click(fn=generate_cartoon, inputs=[img_in, prompt, auto, strength, steps, seed], outputs=cartoon_out)
-if __name__ == "__main__":
-    demo.launch()

+# app.py — Age-first + FAST group cartoons (SD-Turbo), single page
 import os
 os.environ["TRANSFORMERS_NO_TF"] = "1"
 os.environ["TRANSFORMERS_NO_FLAX"] = "1"
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+import math
 import gradio as gr
 from PIL import Image, ImageDraw
 import numpy as np
 import torch
+# ------------------ Age estimator ------------------
 from transformers import AutoImageProcessor, AutoModelForImageClassification
 HF_MODEL_ID = "nateraw/vit-age-classifier"
                        for i, p in enumerate(probs))
         return expected, top
+# ------------------ Face detection (single & group) ------------------
 from facenet_pytorch import MTCNN
 class FaceCropper:
+    """
+    Detect faces.
+    - detect_one_wide: returns (crop_with_margin, annotated)
+    - detect_all_wide: returns (list[crops], annotated, list[boxes])
+    Boxes are (x1,y1,x2,y2) floats.
+    """
     def __init__(self, device: str | None = None, margin_scale: float = 1.8):
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.mtcnn = MTCNN(keep_all=True, device=self.device)
             return img.convert("RGB")
         return Image.fromarray(img).convert("RGB")
+    def _expand_box(self, box, W, H, aspect=0.8):  # 4:5 portrait (w/h=0.8)
+        x1, y1, x2, y2 = box
+        cx, cy = (x1 + x2)/2, (y1 + y2)/2
+        w, h = (x2 - x1), (y2 - y1)
+        side = max(w, h) * self.margin_scale
+        tw = side
+        th = side / aspect  # make it taller than wide
+        nx1 = int(max(0, cx - tw/2)); nx2 = int(min(W, cx + tw/2))
+        ny1 = int(max(0, cy - th/2)); ny2 = int(min(H, cy + th/2))
+        return nx1, ny1, nx2, ny2
+    def detect_one_wide(self, img):
         pil = self._ensure_pil(img)
         W, H = pil.size
         boxes, probs = self.mtcnn.detect(pil)
         annotated = pil.copy()
         draw = ImageDraw.Draw(annotated)
         if boxes is None or len(boxes) == 0:
             return None, annotated
         # draw all boxes
         for b, p in zip(boxes, probs):
             bx1, by1, bx2, by2 = map(float, b)
             draw.rectangle([bx1, by1, bx2, by2], outline=(255, 0, 0), width=3)
             draw.text((bx1, max(0, by1-12)), f"{p:.2f}", fill=(255, 0, 0))
+        # choose largest
+        idx = int(np.argmax([(b[2]-b[0])*(b[3]-b[1]) for b in boxes]))
+        nx1, ny1, nx2, ny2 = self._expand_box(boxes[idx], W, H)
         crop = pil.crop((nx1, ny1, nx2, ny2))
         return crop, annotated
+    def detect_all_wide(self, img):
+        pil = self._ensure_pil(img)
+        W, H = pil.size
+        boxes, probs = self.mtcnn.detect(pil)
+        annotated = pil.copy()
+        draw = ImageDraw.Draw(annotated)
+        crops = []
+        ordered = []
+        if boxes is None or len(boxes) == 0:
+            return crops, annotated, []
+        for b, p in sorted(zip(boxes, probs), key=lambda x: (x[0][0]+x[0][2])/2):
+            bx1, by1, bx2, by2 = map(float, b)
+            draw.rectangle([bx1, by1, bx2, by2], outline=(0, 200, 255), width=3)
+            draw.text((bx1, max(0, by1-12)), f"{p:.2f}", fill=(0, 200, 255))
+            nx1, ny1, nx2, ny2 = self._expand_box(b, W, H)
+            crops.append(pil.crop((nx1, ny1, nx2, ny2)))
+            ordered.append((bx1, by1, bx2, by2))
+        return crops, annotated, ordered
 # ------------------ FAST Cartoonizer (SD-Turbo) ------------------
 from diffusers import AutoPipelineForImage2Image
 TURBO_ID = "stabilityai/sd-turbo"
 def load_turbo_pipe(device):
         TURBO_ID,
         torch_dtype=dtype,
         safety_checker=None,
+    ).to(device)
     try:
         pipe.enable_attention_slicing()
     except Exception:
         pass
     return pipe
+# init models once
 age_est = PretrainedAgeEstimator()
+cropper = FaceCropper(device=age_est.device, margin_scale=1.9)
 sd_pipe = load_turbo_pipe(age_est.device)
+# prompts
 DEFAULT_POSITIVE = (
     "beautiful princess portrait, elegant gown, tiara, soft magical lighting, "
     "sparkles, dreamy castle background, painterly, clean lineart, vibrant but natural colors, "
     "blurry, watermark, text, logo"
 )
 def _ensure_pil(img):
     return img if isinstance(img, Image.Image) else Image.fromarray(img)
 def _resize_512(im: Image.Image):
     w, h = im.size
     scale = 512 / max(w, h)
     if scale < 1.0:
         im = im.resize((int(w*scale), int(h*scale)), Image.LANCZOS)
     return im
+# ------------- AGE (single/group) -------------
 @torch.inference_mode()
+def predict_age(img, group_mode=False, auto_crop=True):
     if img is None:
         return {}, "Please upload an image.", None
+    pil = _ensure_pil(img).convert("RGB")
+    if group_mode:
+        crops, annotated, boxes = cropper.detect_all_wide(pil)
+        if not crops:
+            # fallback to full image
+            age, top = age_est.predict(pil, topk=5)
+            probs = {lbl: float(p) for lbl, p in top}
+            md = f"**Estimated age (whole image):** {age:.1f} years"
+            return probs, md, pil
+        # per-face ages
+        rows = ["| # | Age (yrs) | Top-1 | p |", "|---:|---:|---|---:|"]
+        for i, face in enumerate(crops, 1):
+            age, top = age_est.predict(face, topk=3)
+            top1, p1 = top[0]
+            rows.append(f"| {i} | {age:.1f} | {top1} | {p1:.2f} |")
+        md = "\n".join(rows)
+        # also return a simple dict from the largest (first) face just to feed Label
+        age0, top0 = age_est.predict(crops[0], topk=5)
+        probs0 = {lbl: float(p) for lbl, p in top0}
+        return probs0, md, annotated
+    # single
+    face_wide = None; annotated = None
     if auto_crop:
+        face_wide, annotated = cropper.detect_one_wide(pil)
+    target = face_wide if face_wide is not None else pil
     age, top = age_est.predict(target, topk=5)
     probs = {lbl: float(p) for lbl, p in top}
+    md = f"**Estimated age:** {age:.1f} years"
+    return probs, md, (annotated if annotated is not None else pil)
+# ------------- CARTOON (single/group) -------------
 @torch.inference_mode()
+def cartoonize(img, prompt="", group_mode=False, auto_crop=True, strength=0.5, steps=2, seed=-1):
     if img is None:
         return None
+    pil = _ensure_pil(img).convert("RGB")
     user = (prompt or "").strip()
     pos = DEFAULT_POSITIVE if not user else f"{DEFAULT_POSITIVE}, {user}"
     neg = DEFAULT_NEGATIVE
     if isinstance(seed, (int, float)) and int(seed) >= 0:
         generator = torch.Generator(device=age_est.device).manual_seed(int(seed))
+    if group_mode:
+        # detect all faces, stylize each, assemble grid
+        crops, _, _ = cropper.detect_all_wide(pil)
+        if not crops:
+            crops = [pil]  # fallback
+        # resize each to 384 for speed/variety
+        proc = []
+        for c in crops:
+            c = _resiz_