image-embedding

Sleeping

App Files Files Community

DEVAN CHAUHAN commited on Feb 26

Commit

2418377

1 Parent(s): 80e1925

[add] anime face detection and crop

Browse files

Files changed (2) hide show

app.py +113 -54
lbpcascade_animeface.xml +0 -0

app.py CHANGED Viewed

@@ -1,56 +1,94 @@
 import gradio as gr
 print("Loading models...")
-from retinaface import RetinaFace
-print("retinaface loaded")
 import cv2
-print("opencv loaded")
 import numpy as np
-print("numpy loaded")
 from PIL import Image
-print("PIL loaded")
 from rembg import remove
-print("rembg loaded")
 from sentence_transformers import SentenceTransformer
-print("sentence_transformers loaded")
 image_model = SentenceTransformer("clip-ViT-B-32")
 print("CLIP loaded")
-def get_image_embedding(image):
-    emb = image_model.encode(image)
-    return {"embedding": emb.tolist()}
-def process_image(input_image):
-    # Convert PIL → OpenCV
-    img = cv2.cvtColor(np.array(input_image), cv2.COLOR_RGB2BGR)
-    # Detect faces
-    faces = RetinaFace.detect_faces(img)
-    if not faces:
-        return "No face detected", None
-    face = list(faces.values())[0]
-    x1, y1, x2, y2 = face["facial_area"]
-    h, w, _ = img.shape
-    # Expand bounding box (hair included)
     top_expand = 0.5
     side_expand = 0.3
     bottom_expand = 0.2
-    box_width = x2 - x1
-    box_height = y2 - y1
-    x1_new = int(max(0, x1 - box_width * side_expand))
-    x2_new = int(min(w, x2 + box_width * side_expand))
-    y1_new = int(max(0, y1 - box_height * top_expand))
-    y2_new = int(min(h, y2 + box_height * bottom_expand))
-    cropped = img[y1_new:y2_new, x1_new:x2_new]
-    # Convert back to PIL
     pil_image = Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))
     # Background removal
@@ -62,31 +100,52 @@ def process_image(input_image):
     return "Success ✅", output
 with gr.Blocks() as demo:
-    with gr.Tab("Image Embedding"):
         img_input = gr.Image(type="pil")
-        img_output = gr.JSON()
-        img_btn = gr.Button("Generate")
-        img_btn.click(get_image_embedding, img_input, img_output)
-    with gr.Tab("Face Crop & Background Removal"):
-        face_input = gr.Image(type="pil")
-        face_output = gr.Image()
-        face_status = gr.Text()
-        face_btn = gr.Button("Process")
-        face_btn.click(process_image, face_input, [face_status, face_output])
-    with gr.Tab("Pipe"):
-        pipe_input = gr.Image(type="pil")
-        pipe_output = gr.JSON()
-        pipe_btn = gr.Button("Run Pipe")
-        def run_pipe(img):
-            status, processed_img = process_image(img)
-            if status != "Success ✅":
-                return {"status": status, "embedding": None}
-            return get_image_embedding(processed_img)
-        pipe_btn.click(run_pipe, pipe_input, pipe_output)
 print("Launching demo...")
-demo.launch()

 import gradio as gr
 print("Loading models...")
 import cv2
 import numpy as np
 from PIL import Image
 from rembg import remove
 from sentence_transformers import SentenceTransformer
+import urllib.request
+import pathlib
+print("Libraries loaded")
+# Load CLIP Model
 image_model = SentenceTransformer("clip-ViT-B-32")
 print("CLIP loaded")
+# Load Anime Face Cascade
+def load_anime_model():
+    url = "https://raw.githubusercontent.com/nagadomi/lbpcascade_animeface/master/lbpcascade_animeface.xml"
+    path = pathlib.Path("lbpcascade_animeface.xml")
+    if not path.exists():
+        print("Downloading anime face model...")
+        urllib.request.urlretrieve(url, path.as_posix())
+    return cv2.CascadeClassifier(path.as_posix())
+# Load Human Face Cascade
+def load_human_model():
+    path = pathlib.Path(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
+    return cv2.CascadeClassifier(path.as_posix())
+anime_detector = load_anime_model()
+human_detector = load_human_model()
+print("Anime + Human detectors loaded")
+# Embedding Function
+def get_image_embedding(image):
+    emb = image_model.encode(image)
+    return {"embedding": emb.tolist()}
+# Face Crop + Background Remove
+def process_image(input_image, mode):
+    img = cv2.cvtColor(np.array(input_image), cv2.COLOR_RGB2BGR)
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    # Choose detector
+    if mode == "Anime":
+        detector = anime_detector
+    else:
+        detector = human_detector
+    faces = detector.detectMultiScale(
+        gray,
+        scaleFactor=1.1,
+        minNeighbors=5,
+        minSize=(24, 24)
+    )
+    if len(faces) == 0:
+        print("direct to background removal")
+        pil_image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+        output = remove(pil_image)
+        output = output.resize((224, 224))
+        return "Success ✅", output
+    x, y, w, h = faces[0]
+    height, width, _ = img.shape
+    # Expand bounding box
     top_expand = 0.5
     side_expand = 0.3
     bottom_expand = 0.2
+    x1 = int(max(0, x - w * side_expand))
+    x2 = int(min(width, x + w + w * side_expand))
+    y1 = int(max(0, y - h * top_expand))
+    y2 = int(min(height, y + h + h * bottom_expand))
+    cropped = img[y1:y2, x1:x2]
     pil_image = Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))
     # Background removal
     return "Success ✅", output
+# Gradio UI
 with gr.Blocks() as demo:
+    with gr.Tab("Full Pipeline"):
+        mode_selector = gr.Dropdown(
+            choices=["Anime", "Human"],
+            value="Anime",
+            label="Detection Mode"
+        )
         img_input = gr.Image(type="pil")
+        status = gr.Text()
+        img_output = gr.Image()
+        embedding_output = gr.JSON()
+        run_btn = gr.Button("Run Pipeline")
+        def run_pipeline(img, mode):
+            status_msg, processed_img = process_image(img, mode)
+            if status_msg != "Success ✅":
+                return status_msg, None, {"embedding": None}
+            embedding = get_image_embedding(processed_img)
+            return status_msg, processed_img, embedding
+        run_btn.click(
+            run_pipeline,
+            inputs=[img_input, mode_selector],
+            outputs=[status, img_output, embedding_output]
+        )
+    with gr.Tab("Embedding Only"):
+        img_input2 = gr.Image(type="pil")
+        embedding_output2 = gr.JSON()
+        run_btn2 = gr.Button("Get Embedding")
+        def get_embedding_only(img):
+            embedding = get_image_embedding(img)
+            return embedding
+        run_btn2.click(
+            get_embedding_only,
+            inputs=img_input2,
+            outputs=embedding_output2
+        )
 print("Launching demo...")
+demo.queue(max_size=15).launch()

lbpcascade_animeface.xml ADDED Viewed

The diff for this file is too large to render. See raw diff