Spaces:

hongyu12321
/

RedFish

Sleeping

App Files Files Community

hongyu12321 commited on Sep 13, 2025

Commit

7093eab

verified ·

1 Parent(s): 482599f

Update app.py

Browse files

Files changed (1) hide show

app.py +128 -46

app.py CHANGED Viewed

@@ -1,10 +1,11 @@
-# app.py — Age-first + FAST cartoon (Turbo), nicer framing & magical background
 import os
 os.environ["TRANSFORMERS_NO_TF"] = "1"
 os.environ["TRANSFORMERS_NO_FLAX"] = "1"
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 import gradio as gr
 from PIL import Image, ImageDraw
 import numpy as np
@@ -20,7 +21,7 @@ AGE_RANGE_TO_MID = {
 }
 class PretrainedAgeEstimator:
-    def __init__(self, model_id: str = HF_MODEL_ID, device: str | None = None):
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.processor = AutoImageProcessor.from_pretrained(model_id, use_fast=True)
         self.model = AutoModelForImageClassification.from_pretrained(model_id)
@@ -41,12 +42,12 @@ class PretrainedAgeEstimator:
                        for i, p in enumerate(probs))
         return expected, top
-# ------------------ Face detection with WIDER crop ------------------
 from facenet_pytorch import MTCNN
 class FaceCropper:
     """Detect faces; return (cropped_wide, annotated). Adds margin so face isn't full screen."""
-    def __init__(self, device: str | None = None, margin_scale: float = 1.8):
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.mtcnn = MTCNN(keep_all=True, device=self.device)
         self.margin_scale = margin_scale
@@ -56,7 +57,7 @@ class FaceCropper:
             return img.convert("RGB")
         return Image.fromarray(img).convert("RGB")
-    def detect_and_crop_wide(self, img, select="largest"):
         pil = self._ensure_pil(img)
         W, H = pil.size
         boxes, probs = self.mtcnn.detect(pil)
@@ -65,13 +66,7 @@ class FaceCropper:
         draw = ImageDraw.Draw(annotated)
         if boxes is None or len(boxes) == 0:
-            return None, annotated
-        # choose largest face
-        idx = int(np.argmax([(b[2]-b[0])*(b[3]-b[1]) for b in boxes]))
-        if isinstance(select, int) and 0 <= select < len(boxes):
-            idx = select
-        x1, y1, x2, y2 = boxes[idx]
         # draw all boxes
         for b, p in zip(boxes, probs):
@@ -79,11 +74,13 @@ class FaceCropper:
             draw.rectangle([bx1, by1, bx2, by2], outline=(255, 0, 0), width=3)
             draw.text((bx1, max(0, by1-12)), f"{p:.2f}", fill=(255, 0, 0))
-        # expand with margin
         cx, cy = (x1 + x2) / 2.0, (y1 + y2) / 2.0
         w, h = (x2 - x1), (y2 - y1)
-        side = max(w, h) * self.margin_scale  # wider frame to include background/shoulders
-        # keep a pleasant portrait aspect (4:5)
         target_w = side
         target_h = side * 1.25
@@ -95,8 +92,10 @@ class FaceCropper:
         crop = pil.crop((nx1, ny1, nx2, ny2))
         return crop, annotated
-# ------------------ FAST Cartoonizer (SD-Turbo) ------------------
 from diffusers import AutoPipelineForImage2Image
 # Turbo is very fast (1–4 steps). Great for stylization on CPU/GPU.
 TURBO_ID = "stabilityai/sd-turbo"
@@ -105,10 +104,16 @@ def load_turbo_pipe(device):
     dtype = torch.float16 if (device == "cuda") else torch.float32
     pipe = AutoPipelineForImage2Image.from_pretrained(
         TURBO_ID,
-        torch_dtype=dtype,
-        safety_checker=None,
     )
     pipe = pipe.to(device)
     try:
         pipe.enable_attention_slicing()
     except Exception:
@@ -117,16 +122,41 @@ def load_turbo_pipe(device):
 # ------------------ Init models once ------------------
 age_est = PretrainedAgeEstimator()
-cropper = FaceCropper(device=age_est.device, margin_scale=1.8)  # 1.6–2.0 feels good
 sd_pipe = load_turbo_pipe(age_est.device)
-# ------------------ Prompts ------------------
-DEFAULT_POSITIVE = (
-    "beautiful princess portrait, elegant gown, tiara, soft magical lighting, "
-    "sparkles, dreamy castle background, painterly, clean lineart, vibrant but natural colors, "
-    "storybook illustration, high quality"
-)
-DEFAULT_NEGATIVE = (
     "deformed, disfigured, ugly, extra limbs, extra fingers, bad anatomy, low quality, "
     "blurry, watermark, text, logo"
 )
@@ -143,7 +173,41 @@ def _resize_512(im: Image.Image):
         im = im.resize((int(w*scale), int(h*scale)), Image.LANCZOS)
     return im
-# ------------------ 1) Predict Age (fast) ------------------
 @torch.inference_mode()
 def predict_age_only(img, auto_crop=True):
     if img is None:
@@ -161,14 +225,14 @@ def predict_age_only(img, auto_crop=True):
     summary = f"**Estimated age:** {age:.1f} years"
     return probs, summary, (annotated if annotated is not None else img)
-# ------------------ 2) Generate Cartoon (fast) ------------------
 @torch.inference_mode()
-def generate_cartoon(img, prompt="", auto_crop=True, strength=0.5, steps=2, seed=-1):
     if img is None:
         return None
     img = _ensure_pil(img).convert("RGB")
-    # use wide face crop to include background/shoulders
     if auto_crop:
         face_wide, _ = cropper.detect_and_crop_wide(img)
         if face_wide is not None:
@@ -176,45 +240,55 @@ def generate_cartoon(img, prompt="", auto_crop=True, strength=0.5, steps=2, seed
     img = _resize_512(img)
-    # prompt assembly
-    user = (prompt or "").strip()
-    pos = DEFAULT_POSITIVE if not user else f"{DEFAULT_POSITIVE}, {user}"
-    neg = DEFAULT_NEGATIVE
     generator = None
     if isinstance(seed, (int, float)) and int(seed) >= 0:
         generator = torch.Generator(device=age_est.device).manual_seed(int(seed))
-    # Turbo likes low steps and guidance ~0
     out = sd_pipe(
-        prompt=pos,
-        negative_prompt=neg,
         image=img,
         strength=float(strength),          # 0.4–0.6 keeps identity & adds dress/background
-        guidance_scale=0.0,                # Turbo typically uses 0
         num_inference_steps=int(steps),    # 1–4 steps → very fast
         generator=generator,
     )
     return out.images[0]
 # ------------------ UI ------------------
-with gr.Blocks(title="Age First + Fast Cartoon") as demo:
-    gr.Markdown("# Upload or capture once — get age prediction first, then a faster cartoon ✨")
     with gr.Row():
         with gr.Column(scale=1):
             img_in = gr.Image(sources=["upload", "webcam"], type="pil", label="Upload / Webcam")
             auto = gr.Checkbox(True, label="Auto face crop (wide, recommended)")
-            prompt = gr.Textbox(
-                label="(Optional) Extra cartoon style",
-                placeholder="e.g., studio ghibli watercolor, soft bokeh, pastel palette"
             )
             with gr.Row():
                 strength = gr.Slider(0.3, 0.8, value=0.5, step=0.05, label="Cartoon strength")
                 steps = gr.Slider(1, 4, value=2, step=1, label="Turbo steps (1–4)")
                 seed = gr.Number(value=-1, precision=0, label="Seed (-1 = random)")
-            btn_age = gr.Button("Predict Age (fast)", variant="primary")
             btn_cartoon = gr.Button("Make Cartoon (fast)", variant="secondary")
         with gr.Column(scale=1):
@@ -225,7 +299,15 @@ with gr.Blocks(title="Age First + Fast Cartoon") as demo:
     # Wire the buttons
     btn_age.click(fn=predict_age_only, inputs=[img_in, auto], outputs=[probs_out, age_md, preview])
-    btn_cartoon.click(fn=generate_cartoon, inputs=[img_in, prompt, auto, strength, steps, seed], outputs=cartoon_out)
 if __name__ == "__main__":
-    demo.launch()

+# app.py — Age-first + FAST cartoon (Turbo) with prompt hint pickers (largest face only)
 import os
 os.environ["TRANSFORMERS_NO_TF"] = "1"
 os.environ["TRANSFORMERS_NO_FLAX"] = "1"
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+from typing import Optional
 import gradio as gr
 from PIL import Image, ImageDraw
 import numpy as np
 }
 class PretrainedAgeEstimator:
+    def __init__(self, model_id: str = HF_MODEL_ID, device: Optional[str] = None):
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.processor = AutoImageProcessor.from_pretrained(model_id, use_fast=True)
         self.model = AutoModelForImageClassification.from_pretrained(model_id)
                        for i, p in enumerate(probs))
         return expected, top
+# ------------------ Face detection with WIDER crop (largest face) ------------------
 from facenet_pytorch import MTCNN
 class FaceCropper:
     """Detect faces; return (cropped_wide, annotated). Adds margin so face isn't full screen."""
+    def __init__(self, device: Optional[str] = None, margin_scale: float = 1.8):
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.mtcnn = MTCNN(keep_all=True, device=self.device)
         self.margin_scale = margin_scale
             return img.convert("RGB")
         return Image.fromarray(img).convert("RGB")
+    def detect_and_crop_wide(self, img):
         pil = self._ensure_pil(img)
         W, H = pil.size
         boxes, probs = self.mtcnn.detect(pil)
         draw = ImageDraw.Draw(annotated)
         if boxes is None or len(boxes) == 0:
+            return None, annotated  # no faces
         # draw all boxes
         for b, p in zip(boxes, probs):
             draw.rectangle([bx1, by1, bx2, by2], outline=(255, 0, 0), width=3)
             draw.text((bx1, max(0, by1-12)), f"{p:.2f}", fill=(255, 0, 0))
+        # choose largest face
+        idx = int(np.argmax([(b[2]-b[0])*(b[3]-b[1]) for b in boxes]))
+        x1, y1, x2, y2 = boxes[idx]
+        # expand with margin (4:5 portrait feel)
         cx, cy = (x1 + x2) / 2.0, (y1 + y2) / 2.0
         w, h = (x2 - x1), (y2 - y1)
+        side = max(w, h) * self.margin_scale
         target_w = side
         target_h = side * 1.25
         crop = pil.crop((nx1, ny1, nx2, ny2))
         return crop, annotated
+# ------------------ FAST Cartoonizer (SD-Turbo) with safety ------------------
 from diffusers import AutoPipelineForImage2Image
+from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
+from transformers import AutoFeatureExtractor
 # Turbo is very fast (1–4 steps). Great for stylization on CPU/GPU.
 TURBO_ID = "stabilityai/sd-turbo"
     dtype = torch.float16 if (device == "cuda") else torch.float32
     pipe = AutoPipelineForImage2Image.from_pretrained(
         TURBO_ID,
+        dtype=dtype,           # ✅ use dtype (no deprecation warning)
     )
     pipe = pipe.to(device)
+    # Safety checker ON for public Spaces
+    pipe.safety_checker = StableDiffusionSafetyChecker.from_pretrained(
+        "CompVis/stable-diffusion-safety-checker"
+    )
+    pipe.feature_extractor = AutoFeatureExtractor.from_pretrained(
+        "CompVis/stable-diffusion-safety-checker"
+    )
     try:
         pipe.enable_attention_slicing()
     except Exception:
 # ------------------ Init models once ------------------
 age_est = PretrainedAgeEstimator()
+cropper = FaceCropper(device=age_est.device, margin_scale=1.85)  # 1.6–2.0 feels good
 sd_pipe = load_turbo_pipe(age_est.device)
+# ------------------ Prompt hint dictionaries ------------------
+ROLE_CHOICES = [
+    "Queen/Princess", "King/Prince", "Fairy", "Elf", "Knight", "Sorcerer/Sorceress",
+    "Steampunk Royalty", "Cyberpunk Royalty", "Superhero", "Anime Protagonist"
+]
+BACKGROUND_CHOICES = [
+    "grand castle hall", "castle balcony at sunset", "enchanted forest", "starry night sky",
+    "throne room with banners", "crystal palace", "moonlit garden", "winter snow castle",
+    "golden hour meadow", "mystical waterfall"
+]
+LIGHTING_CHOICES = [
+    "soft magical lighting", "golden hour rim light", "cinematic soft light",
+    "glowing ambience", "volumetric light rays", "dramatic chiaroscuro"
+]
+ARTSTYLE_CHOICES = [
+    "Disney/Pixar style", "Studio Ghibli watercolor", "cel-shaded cartoon",
+    "storybook illustration", "painterly brush strokes", "anime lineart"
+]
+COLOR_CHOICES = [
+    "pastel palette", "vibrant colors", "warm tones", "cool tones",
+    "iridescent highlights", "royal gold & sapphire"
+]
+OUTFIT_CHOICES = [
+    "elegant gown", "ornate royal cloak", "jeweled tiara/crown",
+    "silver diadem", "flowing cape", "intricate embroidery"
+]
+EFFECTS_CHOICES = [
+    "sparkles", "soft bokeh background", "floating petals", "glowing particles",
+    "butterflies", "magical aura"
+]
+NEGATIVE_PROMPT = (
     "deformed, disfigured, ugly, extra limbs, extra fingers, bad anatomy, low quality, "
     "blurry, watermark, text, logo"
 )
         im = im.resize((int(w*scale), int(h*scale)), Image.LANCZOS)
     return im
+def build_prompt(role, background, lighting, artstyle, colors, outfit, effects, extra):
+    bits = []
+    # role to base descriptors
+    role_map = {
+        "Queen/Princess": "regal queen/princess portrait",
+        "King/Prince": "regal king/prince portrait",
+        "Fairy": "ethereal fairy portrait with delicate wings",
+        "Elf": "elven royalty portrait with elegant ears",
+        "Knight": "valiant knight portrait in ornate armor",
+        "Sorcerer/Sorceress": "mystical sorcerer portrait with arcane motifs",
+        "Steampunk Royalty": "steampunk royal portrait with brass filigree",
+        "Cyberpunk Royalty": "cyberpunk royal portrait with neon accents",
+        "Superhero": "heroic comic-style portrait",
+        "Anime Protagonist": "anime protagonist portrait"
+    }
+    if role:
+        bits.append(role_map.get(role, role))
+    # the hint pickers
+    for group in (background, lighting, artstyle, colors, outfit, effects):
+        if group and isinstance(group, list):
+            bits.append(", ".join(group))
+    # strong general quality/style anchors
+    bits.append("clean lineart, storybook illustration, high quality")
+    # extra user text
+    extra = (extra or "").strip()
+    if extra:
+        bits.append(extra)
+    # join
+    return ", ".join([b for b in bits if b])
+# ------------------ 1) Predict Age (fast, largest face) ------------------
 @torch.inference_mode()
 def predict_age_only(img, auto_crop=True):
     if img is None:
     summary = f"**Estimated age:** {age:.1f} years"
     return probs, summary, (annotated if annotated is not None else img)
+# ------------------ 2) Generate Cartoon (fast, largest face) ------------------
 @torch.inference_mode()
+def generate_cartoon(img, role, background, lighting, artstyle, colors, outfit, effects,
+                     extra_desc, auto_crop=True, strength=0.5, steps=2, seed=-1):
     if img is None:
         return None
     img = _ensure_pil(img).convert("RGB")
     if auto_crop:
         face_wide, _ = cropper.detect_and_crop_wide(img)
         if face_wide is not None:
     img = _resize_512(img)
+    # prompt assembly from pickers
+    prompt = build_prompt(role, background, lighting, artstyle, colors, outfit, effects, extra_desc)
     generator = None
     if isinstance(seed, (int, float)) and int(seed) >= 0:
         generator = torch.Generator(device=age_est.device).manual_seed(int(seed))
     out = sd_pipe(
+        prompt=prompt,
+        negative_prompt=NEGATIVE_PROMPT,
         image=img,
         strength=float(strength),          # 0.4–0.6 keeps identity & adds dress/background
+        guidance_scale=0.0,                # Turbo commonly uses 0
         num_inference_steps=int(steps),    # 1–4 steps → very fast
         generator=generator,
     )
     return out.images[0]
 # ------------------ UI ------------------
+with gr.Blocks(title="Age First + Fast Cartoon (with Hint Pickers)") as demo:
+    gr.Markdown("# Upload or capture once — get age prediction first, then a beautiful cartoon ✨")
+    gr.Markdown("Largest face is used if multiple people are present.")
     with gr.Row():
         with gr.Column(scale=1):
             img_in = gr.Image(sources=["upload", "webcam"], type="pil", label="Upload / Webcam")
             auto = gr.Checkbox(True, label="Auto face crop (wide, recommended)")
+            # --- Age first
+            btn_age = gr.Button("Predict Age (fast)", variant="primary")
+            gr.Markdown("### Cartoon Description Hints")
+            role = gr.Dropdown(choices=ROLE_CHOICES, value="Queen/Princess", label="Role")
+            background = gr.CheckboxGroup(choices=BACKGROUND_CHOICES, label="Background")
+            lighting = gr.CheckboxGroup(choices=LIGHTING_CHOICES, label="Lighting")
+            artstyle = gr.CheckboxGroup(choices=ARTSTYLE_CHOICES, label="Art Style")
+            colors = gr.CheckboxGroup(choices=COLOR_CHOICES, label="Color Mood")
+            outfit = gr.CheckboxGroup(choices=OUTFIT_CHOICES, label="Outfit / Accessories")
+            effects = gr.CheckboxGroup(choices=EFFECTS_CHOICES, label="Magical Effects")
+            extra = gr.Textbox(
+                label="Extra description (optional)",
+                placeholder="e.g., silver tiara, flowing gown, castle balcony at sunset"
             )
             with gr.Row():
                 strength = gr.Slider(0.3, 0.8, value=0.5, step=0.05, label="Cartoon strength")
                 steps = gr.Slider(1, 4, value=2, step=1, label="Turbo steps (1–4)")
                 seed = gr.Number(value=-1, precision=0, label="Seed (-1 = random)")
             btn_cartoon = gr.Button("Make Cartoon (fast)", variant="secondary")
         with gr.Column(scale=1):
     # Wire the buttons
     btn_age.click(fn=predict_age_only, inputs=[img_in, auto], outputs=[probs_out, age_md, preview])
+    btn_cartoon.click(
+        fn=generate_cartoon,
+        inputs=[img_in, role, background, lighting, artstyle, colors, outfit, effects,
+                extra, auto, strength, steps, seed],
+        outputs=cartoon_out
+    )
+# Expose app for HF Spaces
+app = demo
 if __name__ == "__main__":
+    app.queue().launch()