Spaces:

hongyu12321
/

RedFish

Sleeping

App Files Files Community

hongyu12321 commited on Sep 13, 2025

Commit

482599f

verified ·

1 Parent(s): f63b4cd

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -97

app.py CHANGED Viewed

@@ -1,27 +1,26 @@
-# app.py — Single-face: Age + Gender + Fast Cartoon (Queen/King/Fairy)
 import os
 os.environ["TRANSFORMERS_NO_TF"] = "1"
 os.environ["TRANSFORMERS_NO_FLAX"] = "1"
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
-from typing import Optional
 import gradio as gr
 from PIL import Image, ImageDraw
 import numpy as np
 import torch
-# ------------------ Age estimator ------------------
 from transformers import AutoImageProcessor, AutoModelForImageClassification
-HF_AGE_ID = "nateraw/vit-age-classifier"
 AGE_RANGE_TO_MID = {
     "0-2": 1, "3-9": 6, "10-19": 15, "20-29": 25, "30-39": 35,
     "40-49": 45, "50-59": 55, "60-69": 65, "70+": 75
 }
-class AgeEstimator:
-    def __init__(self, model_id: str = HF_AGE_ID, device: Optional[str] = None):
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.processor = AutoImageProcessor.from_pretrained(model_id, use_fast=True)
         self.model = AutoModelForImageClassification.from_pretrained(model_id)
@@ -42,51 +41,12 @@ class AgeEstimator:
                        for i, p in enumerate(probs))
         return expected, top
-# ------------------ Gender estimator (best-effort, optional) ------------------
-# We try to load a small HF gender classifier. If unavailable, we return "unknown".
-_GENDER_MODEL_IDS = [
-    "phiyodr/vit-gender-classification",        # (common community model)
-    "rizvandwiki/gender-classification",        # fallback
-]
-class GenderEstimator:
-    def __init__(self, device: Optional[str] = None):
-        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
-        self.model = None
-        self.processor = None
-        self.id2label = None
-        from transformers import AutoImageProcessor, AutoModelForImageClassification
-        for mid in _GENDER_MODEL_IDS:
-            try:
-                self.processor = AutoImageProcessor.from_pretrained(mid, use_fast=True)
-                self.model = AutoModelForImageClassification.from_pretrained(mid)
-                self.model.to(self.device).eval()
-                self.id2label = self.model.config.id2label
-                self.model_id = mid
-                break
-            except Exception:
-                self.processor = None
-                self.model = None
-                self.id2label = None
-        self.available = self.model is not None
-    @torch.inference_mode()
-    def predict(self, img: Image.Image):
-        if not self.available:
-            return "unknown", 0.0
-        if img.mode != "RGB":
-            img = img.convert("RGB")
-        inputs = self.processor(images=img, return_tensors="pt").to(self.device)
-        logits = self.model(**inputs).logits
-        probs = logits.softmax(dim=-1).squeeze(0)
-        score, idx = torch.max(probs, dim=0)
-        label = self.id2label[idx.item()]
-        return label, float(score.item())
-# ------------------ Largest-face detector with margin ------------------
 from facenet_pytorch import MTCNN
 class FaceCropper:
-    def __init__(self, device: Optional[str] = None, margin_scale: float = 1.9):
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.mtcnn = MTCNN(keep_all=True, device=self.device)
         self.margin_scale = margin_scale
@@ -96,58 +56,59 @@ class FaceCropper:
             return img.convert("RGB")
         return Image.fromarray(img).convert("RGB")
-    def _expand_box(self, box, W, H, aspect=0.8):  # ~4:5 portrait
-        x1, y1, x2, y2 = box
-        cx, cy = (x1 + x2)/2, (y1 + y2)/2
-        w, h = (x2 - x1), (y2 - y1)
-        side = max(w, h) * self.margin_scale
-        tw = side
-        th = side / aspect
-        nx1 = int(max(0, cx - tw/2)); nx2 = int(min(W, cx + tw/2))
-        ny1 = int(max(0, cy - th/2)); ny2 = int(min(H, cy + th/2))
-        return nx1, ny1, nx2, ny2
-    def detect_largest_wide(self, img):
         pil = self._ensure_pil(img)
         W, H = pil.size
         boxes, probs = self.mtcnn.detect(pil)
         annotated = pil.copy()
         draw = ImageDraw.Draw(annotated)
         if boxes is None or len(boxes) == 0:
-            return None, annotated  # no detection
-        # draw all detections
         for b, p in zip(boxes, probs):
             bx1, by1, bx2, by2 = map(float, b)
-            draw.rectangle([bx1, by1, bx2, by2], outline=(0, 200, 255), width=3)
-            draw.text((bx1, max(0, by1-12)), f"{p:.2f}", fill=(0, 200, 255))
-        # pick largest box
-        idx = int(np.argmax([(b[2]-b[0])*(b[3]-b[1]) for b in boxes]))
-        nx1, ny1, nx2, ny2 = self._expand_box(boxes[idx], W, H)
         crop = pil.crop((nx1, ny1, nx2, ny2))
         return crop, annotated
-# ------------------ FAST Cartoonizer (SD-Turbo, with safety) ------------------
 from diffusers import AutoPipelineForImage2Image
-from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
-from transformers import AutoFeatureExtractor
 TURBO_ID = "stabilityai/sd-turbo"
 def load_turbo_pipe(device):
-    dtype = torch.float16 if torch.cuda.is_available() else torch.float32
     pipe = AutoPipelineForImage2Image.from_pretrained(
         TURBO_ID,
-        dtype=dtype,   # no deprecation warning
-    ).to(device)
-    # enable safety checker for public Space
-    pipe.safety_checker = StableDiffusionSafetyChecker.from_pretrained(
-        "CompVis/stable-diffusion-safety-checker"
-    )
-    pipe.feature_extractor = AutoFeatureExtractor.from_pretrained(
-        "CompVis/stable-diffusion-safety-checker"
     )
     try:
         pipe.enable_attention_slicing()
     except Exception:
@@ -155,42 +116,116 @@ def load_turbo_pipe(device):
     return pipe
 # ------------------ Init models once ------------------
-age_est = AgeEstimator()
-gender_est = GenderEstimator(device=age_est.device)
-cropper = FaceCropper(device=age_est.device, margin_scale=1.9)
 sd_pipe = load_turbo_pipe(age_est.device)
 # ------------------ Prompts ------------------
-STYLE_BASE = {
-    "Queen": "regal queen portrait, elegant royal gown, jeweled tiara, ornate details, dreamy castle background, soft magical lighting, sparkles, storybook illustration, high quality",
-    "King": "regal king portrait, ornate royal cloak and crown, majestic posture, grand throne room background, cinematic soft lighting, painterly style, storybook illustration, high quality",
-    "Fairy": "fairy portrait, ethereal wings, glowing particles, enchanted forest background, luminous soft lighting, delicate dress, whimsical, storybook illustration, high quality",
-}
-NEGATIVE_PROMPT = "deformed, disfigured, ugly, extra limbs, extra fingers, bad anatomy, low quality, blurry, watermark, text, logo"
 def _ensure_pil(img):
     return img if isinstance(img, Image.Image) else Image.fromarray(img)
 def _resize_512(im: Image.Image):
     w, h = im.size
     scale = 512 / max(w, h)
     if scale < 1.0:
         im = im.resize((int(w*scale), int(h*scale)), Image.LANCZOS)
     return im
-# ------------------ 1) Predict Age+Gender (fast) ------------------
 @torch.inference_mode()
-def predict_age_gender(img, auto_crop=True):
     if img is None:
         return {}, "Please upload an image.", None
-    pil = _ensure_pil(img).convert("RGB")
-    face_wide, annotated = (None, None)
     if auto_crop:
-        face_wide, annotated = cropper.detect_largest_wide(pil)
-    target = face_wide if face_wide is not None else pil
-    # age
     age, top = age_est.predict(target, topk=5)
     probs = {lbl: float(p) for lbl, p in top}

+# app.py — Age-first + FAST cartoon (Turbo), nicer framing & magical background
 import os
 os.environ["TRANSFORMERS_NO_TF"] = "1"
 os.environ["TRANSFORMERS_NO_FLAX"] = "1"
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 import gradio as gr
 from PIL import Image, ImageDraw
 import numpy as np
 import torch
+# ------------------ Age estimator (Hugging Face) ------------------
 from transformers import AutoImageProcessor, AutoModelForImageClassification
+HF_MODEL_ID = "nateraw/vit-age-classifier"
 AGE_RANGE_TO_MID = {
     "0-2": 1, "3-9": 6, "10-19": 15, "20-29": 25, "30-39": 35,
     "40-49": 45, "50-59": 55, "60-69": 65, "70+": 75
 }
+class PretrainedAgeEstimator:
+    def __init__(self, model_id: str = HF_MODEL_ID, device: str | None = None):
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.processor = AutoImageProcessor.from_pretrained(model_id, use_fast=True)
         self.model = AutoModelForImageClassification.from_pretrained(model_id)
                        for i, p in enumerate(probs))
         return expected, top
+# ------------------ Face detection with WIDER crop ------------------
 from facenet_pytorch import MTCNN
 class FaceCropper:
+    """Detect faces; return (cropped_wide, annotated). Adds margin so face isn't full screen."""
+    def __init__(self, device: str | None = None, margin_scale: float = 1.8):
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.mtcnn = MTCNN(keep_all=True, device=self.device)
         self.margin_scale = margin_scale
             return img.convert("RGB")
         return Image.fromarray(img).convert("RGB")
+    def detect_and_crop_wide(self, img, select="largest"):
         pil = self._ensure_pil(img)
         W, H = pil.size
         boxes, probs = self.mtcnn.detect(pil)
         annotated = pil.copy()
         draw = ImageDraw.Draw(annotated)
         if boxes is None or len(boxes) == 0:
+            return None, annotated
+        # choose largest face
+        idx = int(np.argmax([(b[2]-b[0])*(b[3]-b[1]) for b in boxes]))
+        if isinstance(select, int) and 0 <= select < len(boxes):
+            idx = select
+        x1, y1, x2, y2 = boxes[idx]
+        # draw all boxes
         for b, p in zip(boxes, probs):
             bx1, by1, bx2, by2 = map(float, b)
+            draw.rectangle([bx1, by1, bx2, by2], outline=(255, 0, 0), width=3)
+            draw.text((bx1, max(0, by1-12)), f"{p:.2f}", fill=(255, 0, 0))
+        # expand with margin
+        cx, cy = (x1 + x2) / 2.0, (y1 + y2) / 2.0
+        w, h = (x2 - x1), (y2 - y1)
+        side = max(w, h) * self.margin_scale  # wider frame to include background/shoulders
+        # keep a pleasant portrait aspect (4:5)
+        target_w = side
+        target_h = side * 1.25
+        nx1 = int(max(0, cx - target_w/2))
+        nx2 = int(min(W, cx + target_w/2))
+        ny1 = int(max(0, cy - target_h/2))
+        ny2 = int(min(H, cy + target_h/2))
         crop = pil.crop((nx1, ny1, nx2, ny2))
         return crop, annotated
+# ------------------ FAST Cartoonizer (SD-Turbo) ------------------
 from diffusers import AutoPipelineForImage2Image
+# Turbo is very fast (1–4 steps). Great for stylization on CPU/GPU.
 TURBO_ID = "stabilityai/sd-turbo"
 def load_turbo_pipe(device):
+    dtype = torch.float16 if (device == "cuda") else torch.float32
     pipe = AutoPipelineForImage2Image.from_pretrained(
         TURBO_ID,
+        torch_dtype=dtype,
+        safety_checker=None,
     )
+    pipe = pipe.to(device)
     try:
         pipe.enable_attention_slicing()
     except Exception:
     return pipe
 # ------------------ Init models once ------------------
+age_est = PretrainedAgeEstimator()
+cropper = FaceCropper(device=age_est.device, margin_scale=1.8)  # 1.6–2.0 feels good
 sd_pipe = load_turbo_pipe(age_est.device)
 # ------------------ Prompts ------------------
+DEFAULT_POSITIVE = (
+    "beautiful princess portrait, elegant gown, tiara, soft magical lighting, "
+    "sparkles, dreamy castle background, painterly, clean lineart, vibrant but natural colors, "
+    "storybook illustration, high quality"
+)
+DEFAULT_NEGATIVE = (
+    "deformed, disfigured, ugly, extra limbs, extra fingers, bad anatomy, low quality, "
+    "blurry, watermark, text, logo"
+)
+# ------------------ Helpers ------------------
 def _ensure_pil(img):
     return img if isinstance(img, Image.Image) else Image.fromarray(img)
 def _resize_512(im: Image.Image):
+    # keep aspect, fit longest side to 512 (faster, fewer artifacts)
     w, h = im.size
     scale = 512 / max(w, h)
     if scale < 1.0:
         im = im.resize((int(w*scale), int(h*scale)), Image.LANCZOS)
     return im
+# ------------------ 1) Predict Age (fast) ------------------
 @torch.inference_mode()
+def predict_age_only(img, auto_crop=True):
     if img is None:
         return {}, "Please upload an image.", None
+    img = _ensure_pil(img).convert("RGB")
+    face_wide = None
+    annotated = None
     if auto_crop:
+        face_wide, annotated = cropper.detect_and_crop_wide(img)
+    target = face_wide if face_wide is not None else img
     age, top = age_est.predict(target, topk=5)
     probs = {lbl: float(p) for lbl, p in top}
+    summary = f"**Estimated age:** {age:.1f} years"
+    return probs, summary, (annotated if annotated is not None else img)
+# ------------------ 2) Generate Cartoon (fast) ------------------
+@torch.inference_mode()
+def generate_cartoon(img, prompt="", auto_crop=True, strength=0.5, steps=2, seed=-1):
+    if img is None:
+        return None
+    img = _ensure_pil(img).convert("RGB")
+    # use wide face crop to include background/shoulders
+    if auto_crop:
+        face_wide, _ = cropper.detect_and_crop_wide(img)
+        if face_wide is not None:
+            img = face_wide
+    img = _resize_512(img)
+    # prompt assembly
+    user = (prompt or "").strip()
+    pos = DEFAULT_POSITIVE if not user else f"{DEFAULT_POSITIVE}, {user}"
+    neg = DEFAULT_NEGATIVE
+    generator = None
+    if isinstance(seed, (int, float)) and int(seed) >= 0:
+        generator = torch.Generator(device=age_est.device).manual_seed(int(seed))
+    # Turbo likes low steps and guidance ~0
+    out = sd_pipe(
+        prompt=pos,
+        negative_prompt=neg,
+        image=img,
+        strength=float(strength),          # 0.4–0.6 keeps identity & adds dress/background
+        guidance_scale=0.0,                # Turbo typically uses 0
+        num_inference_steps=int(steps),    # 1–4 steps → very fast
+        generator=generator,
+    )
+    return out.images[0]
+# ------------------ UI ------------------
+with gr.Blocks(title="Age First + Fast Cartoon") as demo:
+    gr.Markdown("# Upload or capture once — get age prediction first, then a faster cartoon ✨")
+    with gr.Row():
+        with gr.Column(scale=1):
+            img_in = gr.Image(sources=["upload", "webcam"], type="pil", label="Upload / Webcam")
+            auto = gr.Checkbox(True, label="Auto face crop (wide, recommended)")
+            prompt = gr.Textbox(
+                label="(Optional) Extra cartoon style",
+                placeholder="e.g., studio ghibli watercolor, soft bokeh, pastel palette"
+            )
+            with gr.Row():
+                strength = gr.Slider(0.3, 0.8, value=0.5, step=0.05, label="Cartoon strength")
+                steps = gr.Slider(1, 4, value=2, step=1, label="Turbo steps (1–4)")
+                seed = gr.Number(value=-1, precision=0, label="Seed (-1 = random)")
+            btn_age = gr.Button("Predict Age (fast)", variant="primary")
+            btn_cartoon = gr.Button("Make Cartoon (fast)", variant="secondary")
+        with gr.Column(scale=1):
+            probs_out = gr.Label(num_top_classes=5, label="Age Prediction (probabilities)")
+            age_md = gr.Markdown(label="Age Summary")
+            preview = gr.Image(label="Detection Preview")
+            cartoon_out = gr.Image(label="Cartoon Result")
+    # Wire the buttons
+    btn_age.click(fn=predict_age_only, inputs=[img_in, auto], outputs=[probs_out, age_md, preview])
+    btn_cartoon.click(fn=generate_cartoon, inputs=[img_in, prompt, auto, strength, steps, seed], outputs=cartoon_out)
+if __name__ == "__main__":
+    demo.launch()