Spaces:

Stylique
/

Skin_Tone

Running

App Files Files Community

Stylique commited on Dec 19, 2025

Commit

ceaae7c

verified ·

1 Parent(s): 5d19424

Update app.py

Browse files

Files changed (1) hide show

app.py +447 -123

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from typing import Tuple, Optional, List, Dict
 import cv2
 import gradio as gr
 import numpy as np
@@ -7,17 +6,13 @@ from PIL import Image
 import torch
 from functools import lru_cache
 from transformers import AutoImageProcessor, AutoModelForSemanticSegmentation
 import mediapipe as mp  # MediaPipe is mandatory
-HAS_MEDIAPIPE = True
 def _ensure_rgb_uint8(image: np.ndarray) -> np.ndarray:
-    """Convert an input image array to RGB uint8 format.
-    Gradio provides images as numpy arrays in RGB order with dtype uint8 by default,
-    but we defensively normalize here in case inputs vary.
-    """
     if image is None:
         raise ValueError("No image provided")
@@ -33,6 +28,42 @@ def _ensure_rgb_uint8(image: np.ndarray) -> np.ndarray:
     return image
 def _central_crop_bbox(width: int, height: int, frac: float = 0.6) -> Tuple[int, int, int, int]:
     """Return a central crop bounding box (x1, y1, x2, y2) covering `frac` of width/height."""
     frac = float(np.clip(frac, 0.2, 1.0))
@@ -46,43 +77,121 @@ def _central_crop_bbox(width: int, height: int, frac: float = 0.6) -> Tuple[int,
 def _detect_face_bbox_mediapipe(image_rgb: np.ndarray) -> Optional[Tuple[int, int, int, int]]:
-    """Detect a face bounding box using MediaPipe Face Detection and return (x1, y1, x2, y2).
-    Returns None if detection fails or mediapipe is unavailable.
-    """
-    if not HAS_MEDIAPIPE:
         return None
-    height, width = image_rgb.shape[:2]
     try:
-        with mp.solutions.face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.5) as detector:
-            results = detector.process(image_rgb)
-            detections = results.detections or []
-            if not detections:
-                return None
-            # Pick the largest bbox
-            def bbox_area(det):
-                bbox = det.location_data.relative_bounding_box
-                return max(0.0, bbox.width) * max(0.0, bbox.height)
-            best = max(detections, key=bbox_area)
-            rb = best.location_data.relative_bounding_box
-            x1 = int(np.clip(rb.xmin * width, 0, width - 1))
-            y1 = int(np.clip(rb.ymin * height, 0, height - 1))
-            x2 = int(np.clip((rb.xmin + rb.width) * width, 0, width))
-            y2 = int(np.clip((rb.ymin + rb.height) * height, 0, height))
-            # Expand a bit to include cheeks/forehead
-            pad_x = int(0.08 * width)
-            pad_y = int(0.12 * height)
-            x1 = int(np.clip(x1 - pad_x, 0, width - 1))
-            y1 = int(np.clip(y1 - pad_y, 0, height - 1))
-            x2 = int(np.clip(x2 + pad_x, 0, width))
-            y2 = int(np.clip(y2 + pad_y, 0, height))
-            if x2 - x1 < 10 or y2 - y1 < 10:
-                return None
-            return x1, y1, x2, y2
-    except Exception:
         return None
@@ -110,11 +219,20 @@ def _segment_face_labels(image_rgb: np.ndarray) -> Tuple[np.ndarray, Dict[int, s
     """Run face-parsing segmentation on an RGB crop. Returns (labels HxW int, id2label)."""
     processor, model, id2label, _ = _load_face_parsing_model()
     pil_img = Image.fromarray(image_rgb)
     inputs = processor(images=pil_img, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
-        logits = outputs.logits  # (1, num_labels, h', w')
     # Upsample to original image size
     upsampled = torch.nn.functional.interpolate(
         logits,
@@ -132,36 +250,49 @@ def _skin_indices_from_id2label(id2label: Dict[int, str]) -> List[int]:
         name_l = name.lower()
         if "skin" in name_l:
             skin_indices.append(int(idx))
-    # Fallback: some models may label general face region as 'face'
     if not skin_indices:
-        for idx, name in id2label.items():
-            if "face" in name.lower():
-                skin_indices.append(int(idx))
     return skin_indices
 def _compute_skin_color_hex(image_rgb: np.ndarray, mask: np.ndarray) -> Tuple[str, np.ndarray]:
-    """Compute a robust representative skin color as a hex string and return also the RGB color.
-    Uses median across masked pixels to reduce influence of highlights/shadows.
-    """
     if mask is None or mask.size == 0:
         raise ValueError("Invalid mask for skin color computation")
     # boolean mask for indexing
     mask_bool = mask.astype(bool)
     if not np.any(mask_bool):
         raise ValueError("No skin pixels detected")
     skin_pixels = image_rgb[mask_bool]
-    # Robust median to mitigate outliers
     median_color = np.median(skin_pixels, axis=0)
     median_color = np.clip(median_color, 0, 255).astype(np.uint8)
-    r, g, b = int(median_color[0]), int(median_color[1]), int(median_color[2])
     hex_code = f"#{r:02X}{g:02X}{b:02X}"
-    return hex_code, median_color
 def _solid_color_image(color_rgb: np.ndarray, size: Tuple[int, int] = (160, 160)) -> np.ndarray:
@@ -171,42 +302,140 @@ def _solid_color_image(color_rgb: np.ndarray, size: Tuple[int, int] = (160, 160)
 def detect_skin_tone(image: np.ndarray) -> Tuple[str, np.ndarray, np.ndarray]:
-    """Main pipeline: returns (hex_code, color_swatch_image, debug_mask_overlay).
-    - image: input image as numpy array (H, W, 3) RGB uint8
-    - center_focus: if True, prioritizes central crop region to avoid background/hands
-    """
-    rgb = _ensure_rgb_uint8(image)
-    height, width = rgb.shape[:2]
-    # Mandatory: detect face with MediaPipe
-    face_bbox = _detect_face_bbox_mediapipe(rgb)
-    if face_bbox is None:
-        raise ValueError("No face detected. Please upload an image with a clear frontal face.")
-    x1, y1, x2, y2 = face_bbox
-    central_rgb = rgb[y1:y2, x1:x2]
-    # Face parsing segmentation to get skin mask
-    labels, id2label = _segment_face_labels(central_rgb)
-    skin_indices = _skin_indices_from_id2label(id2label)
-    if not skin_indices:
-        raise ValueError("Face parsing model did not expose a skin class.")
-    skin_mask = np.isin(labels, np.array(skin_indices, dtype=np.int32)).astype(np.uint8) * 255
-    # Compute color from masked central region
-    hex_code, color_rgb = _compute_skin_color_hex(central_rgb, skin_mask)
-    # Prepare swatch and debug visualization
-    swatch = _solid_color_image(color_rgb)
-    # Place mask back into full image coordinates for visualization
-    full_mask = np.zeros((height, width), dtype=np.uint8)
-    full_mask[y1:y2, x1:x2] = skin_mask
-    color_mask = cv2.cvtColor(full_mask, cv2.COLOR_GRAY2RGB)
-    overlay = cv2.addWeighted(rgb, 0.8, color_mask, 0.2, 0)
-    return hex_code, swatch, overlay
 def _hex_html(hex_code: str) -> str:
@@ -214,52 +443,147 @@ def _hex_html(hex_code: str) -> str:
         "display:flex;align-items:center;gap:12px;padding:8px 0;"
     )
     swatch_style = (
-        f"width:20px;height:20px;border-radius:4px;background:{hex_code};"
-        "border:1px solid #ccc;"
     )
     return (
         f"<div style='{style}'>"
         f"<div style='{swatch_style}'></div>"
-        f"<span style='font-family:monospace;font-size:16px'>{hex_code}</span>"
         "</div>"
     )
-with gr.Blocks(title="Skin Tone Detector") as demo:
     gr.Markdown(
         """
-        ### Skin Tone Hex Detector
-        Upload a face image. The app estimates a representative skin tone and returns a HEX color.
         """
     )
     with gr.Row():
-        with gr.Column():
             input_image = gr.Image(
-                label="Upload face image",
                 type="numpy",
                 image_mode="RGB",
-                height=360,
             )
-            run_btn = gr.Button("Detect Skin Tone", variant="primary")
-        with gr.Column():
-            hex_output = gr.HTML(label="HEX Color")
-            swatch_output = gr.Image(label="Color Swatch", type="numpy")
-            debug_output = gr.Image(label="Mask Overlay", type="numpy")
-            gr.Markdown("MediaPipe face detection and a face-parsing model are used to isolate skin pixels.")
     def _run(image: Optional[np.ndarray]):
         if image is None:
-            return _hex_html("#000000"), np.zeros((160, 160, 3), dtype=np.uint8), None
-        hex_code, swatch, debug = detect_skin_tone(image)
-        return _hex_html(hex_code), swatch, debug
-    run_btn.click(_run, inputs=[input_image], outputs=[hex_output, swatch_output, debug_output])
-    input_image.change(_run, inputs=[input_image], outputs=[hex_output, swatch_output, debug_output])
 if __name__ == "__main__":
-    demo.launch()

 from typing import Tuple, Optional, List, Dict
 import cv2
 import gradio as gr
 import numpy as np
 import torch
 from functools import lru_cache
 from transformers import AutoImageProcessor, AutoModelForSemanticSegmentation
 import mediapipe as mp  # MediaPipe is mandatory
+import warnings
+warnings.filterwarnings('ignore')
 def _ensure_rgb_uint8(image: np.ndarray) -> np.ndarray:
+    """Convert an input image array to RGB uint8 format."""
     if image is None:
         raise ValueError("No image provided")
     return image
+def _preprocess_image(image: np.ndarray) -> np.ndarray:
+    """Preprocess image to improve face detection."""
+    rgb = _ensure_rgb_uint8(image)
+    # Resize if image is too large or too small
+    h, w = rgb.shape[:2]
+    # If too large, resize down
+    max_dim = 1024
+    if max(h, w) > max_dim:
+        scale = max_dim / max(h, w)
+        new_w = int(w * scale)
+        new_h = int(h * scale)
+        rgb = cv2.resize(rgb, (new_w, new_h), interpolation=cv2.INTER_AREA)
+    # If too small, resize up
+    min_dim = 200
+    if min(h, w) < min_dim:
+        scale = min_dim / min(h, w)
+        new_w = int(w * scale)
+        new_h = int(h * scale)
+        rgb = cv2.resize(rgb, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
+    # Apply contrast enhancement if image is dark
+    gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY)
+    if np.mean(gray) < 50:  # Too dark
+        lab = cv2.cvtColor(rgb, cv2.COLOR_RGB2LAB)
+        l, a, b = cv2.split(lab)
+        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+        l = clahe.apply(l)
+        lab = cv2.merge((l, a, b))
+        rgb = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
+    return rgb
 def _central_crop_bbox(width: int, height: int, frac: float = 0.6) -> Tuple[int, int, int, int]:
     """Return a central crop bounding box (x1, y1, x2, y2) covering `frac` of width/height."""
     frac = float(np.clip(frac, 0.2, 1.0))
 def _detect_face_bbox_mediapipe(image_rgb: np.ndarray) -> Optional[Tuple[int, int, int, int]]:
+    """Detect a face bounding box using MediaPipe Face Detection and return (x1, y1, x2, y2)."""
+    try:
+        height, width = image_rgb.shape[:2]
+        # Initialize MediaPipe Face Detection
+        mp_face_detection = mp.solutions.face_detection
+        face_detection = mp_face_detection.FaceDetection(
+            model_selection=1,  # 1 for front-facing, 2 for full-range
+            min_detection_confidence=0.3  # Lower confidence for better detection
+        )
+        # Convert to BGR for MediaPipe (MediaPipe expects BGR)
+        image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
+        results = face_detection.process(image_bgr)
+        face_detection.close()
+        if not results.detections:
+            return None
+        # Get all detections
+        detections = []
+        for detection in results.detections:
+            bbox = detection.location_data.relative_bounding_box
+            confidence = detection.score[0]
+            # Convert normalized coordinates to pixel coordinates
+            x = int(bbox.xmin * width)
+            y = int(bbox.ymin * height)
+            w = int(bbox.width * width)
+            h = int(bbox.height * height)
+            # Ensure coordinates are within image bounds
+            x = max(0, x)
+            y = max(0, y)
+            w = min(width - x, w)
+            h = min(height - y, h)
+            if w > 0 and h > 0:
+                detections.append({
+                    'bbox': (x, y, w, h),
+                    'confidence': confidence
+                })
+        if not detections:
+            return None
+        # Sort by confidence and pick the best
+        detections.sort(key=lambda d: d['confidence'], reverse=True)
+        best = detections[0]
+        x, y, w, h = best['bbox']
+        # Expand the bounding box to include more context
+        expand_x = int(w * 0.15)
+        expand_y = int(h * 0.20)
+        x1 = max(0, x - expand_x)
+        y1 = max(0, y - expand_y)
+        x2 = min(width, x + w + expand_x)
+        y2 = min(height, y + h + expand_y)
+        # Ensure minimum size
+        if (x2 - x1) < 50 or (y2 - y1) < 50:
+            # If too small, use central crop instead
+            return None
+        return x1, y1, x2, y2
+    except Exception as e:
+        print(f"MediaPipe error: {e}")
         return None
+def _detect_face_bbox_opencv(image_rgb: np.ndarray) -> Optional[Tuple[int, int, int, int]]:
+    """Fallback face detection using OpenCV Haar cascades."""
     try:
+        gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)
+        # Load pre-trained Haar cascade
+        cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
+        face_cascade = cv2.CascadeClassifier(cascade_path)
+        if face_cascade.empty():
+            print("Haar cascade not loaded properly")
+            return None
+        # Detect faces
+        faces = face_cascade.detectMultiScale(
+            gray,
+            scaleFactor=1.1,
+            minNeighbors=5,
+            minSize=(30, 30),
+            flags=cv2.CASCADE_SCALE_IMAGE
+        )
+        if len(faces) == 0:
+            return None
+        # Get the largest face
+        faces = sorted(faces, key=lambda f: f[2] * f[3], reverse=True)
+        x, y, w, h = faces[0]
+        # Expand bounding box
+        expand_x = int(w * 0.15)
+        expand_y = int(h * 0.20)
+        height, width = image_rgb.shape[:2]
+        x1 = max(0, x - expand_x)
+        y1 = max(0, y - expand_y)
+        x2 = min(width, x + w + expand_x)
+        y2 = min(height, y + h + expand_y)
+        return x1, y1, x2, y2
+    except Exception as e:
+        print(f"OpenCV face detection error: {e}")
         return None
     """Run face-parsing segmentation on an RGB crop. Returns (labels HxW int, id2label)."""
     processor, model, id2label, _ = _load_face_parsing_model()
     pil_img = Image.fromarray(image_rgb)
+    # Resize if too large for the model
+    max_size = 512
+    if max(pil_img.size) > max_size:
+        scale = max_size / max(pil_img.size)
+        new_size = (int(pil_img.size[0] * scale), int(pil_img.size[1] * scale))
+        pil_img = pil_img.resize(new_size, Image.Resampling.LANCZOS)
     inputs = processor(images=pil_img, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
+        logits = outputs.logits
     # Upsample to original image size
     upsampled = torch.nn.functional.interpolate(
         logits,
         name_l = name.lower()
         if "skin" in name_l:
             skin_indices.append(int(idx))
+        elif "face" in name_l and "skin" not in name_l and "hair" not in name_l:
+            skin_indices.append(int(idx))
+    # Default fallback indices (common in face-parsing models)
     if not skin_indices:
+        # Try common skin class indices
+        common_skin_indices = [1, 13, 14, 15]  # These vary by model
+        for idx in common_skin_indices:
+            if idx in id2label:
+                skin_indices.append(idx)
     return skin_indices
 def _compute_skin_color_hex(image_rgb: np.ndarray, mask: np.ndarray) -> Tuple[str, np.ndarray]:
+    """Compute a robust representative skin color as a hex string and return also the RGB color."""
     if mask is None or mask.size == 0:
         raise ValueError("Invalid mask for skin color computation")
     # boolean mask for indexing
     mask_bool = mask.astype(bool)
     if not np.any(mask_bool):
         raise ValueError("No skin pixels detected")
     skin_pixels = image_rgb[mask_bool]
+    # Use median for robustness
     median_color = np.median(skin_pixels, axis=0)
     median_color = np.clip(median_color, 0, 255).astype(np.uint8)
+    # Also compute mean for comparison
+    mean_color = np.mean(skin_pixels, axis=0)
+    mean_color = np.clip(mean_color, 0, 255).astype(np.uint8)
+    # Use median as primary, but fall back to mean if median seems off
+    if np.std(median_color) > 100:  # If median has high variance
+        color_rgb = mean_color
+    else:
+        color_rgb = median_color
+    r, g, b = int(color_rgb[0]), int(color_rgb[1]), int(color_rgb[2])
     hex_code = f"#{r:02X}{g:02X}{b:02X}"
+    return hex_code, color_rgb
 def _solid_color_image(color_rgb: np.ndarray, size: Tuple[int, int] = (160, 160)) -> np.ndarray:
 def detect_skin_tone(image: np.ndarray) -> Tuple[str, np.ndarray, np.ndarray]:
+    """Main pipeline: returns (hex_code, color_swatch_image, debug_mask_overlay)."""
+    try:
+        # Preprocess image
+        rgb = _preprocess_image(image)
+        height, width = rgb.shape[:2]
+        # Create debug image
+        debug_img = rgb.copy()
+        # Try multiple face detection methods
+        face_bbox = None
+        detection_method = ""
+        # Method 1: MediaPipe (primary)
+        face_bbox = _detect_face_bbox_mediapipe(rgb)
+        if face_bbox is not None:
+            detection_method = "MediaPipe"
+        # Method 2: OpenCV Haar Cascade (fallback)
+        if face_bbox is None:
+            face_bbox = _detect_face_bbox_opencv(rgb)
+            if face_bbox is not None:
+                detection_method = "OpenCV Haar"
+        # Method 3: Central crop (last resort)
+        if face_bbox is None:
+            face_bbox = _central_crop_bbox(width, height, frac=0.5)
+            detection_method = "Central Crop"
+            print(f"Warning: Using central crop as fallback")
+        x1, y1, x2, y2 = face_bbox
+        # Ensure bbox is valid and not too small
+        if x2 <= x1 or y2 <= y1:
+            raise ValueError("Invalid bounding box coordinates")
+        if (x2 - x1) < 20 or (y2 - y1) < 20:
+            raise ValueError("Face region too small")
+        # Crop face region
+        face_crop = rgb[y1:y2, x1:x2]
+        if face_crop.size == 0:
+            raise ValueError("Empty face crop")
+        # Draw detection box on debug image
+        color = (0, 255, 0) if detection_method != "Central Crop" else (255, 0, 0)
+        cv2.rectangle(debug_img, (x1, y1), (x2, y2), color, 2)
+        cv2.putText(debug_img, detection_method, (x1, y1 - 10),
+                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
+        # Face parsing segmentation to get skin mask
+        try:
+            labels, id2label = _segment_face_labels(face_crop)
+            skin_indices = _skin_indices_from_id2label(id2label)
+            if not skin_indices:
+                # Create a simple central mask as fallback
+                h, w = face_crop.shape[:2]
+                skin_mask = np.zeros((h, w), dtype=np.uint8)
+                center_y, center_x = h // 2, w // 2
+                mask_size = min(h, w) // 3
+                cv2.ellipse(skin_mask, (center_x, center_y),
+                           (mask_size, mask_size // 2), 0, 0, 360, 255, -1)
+            else:
+                skin_mask = np.isin(labels, np.array(skin_indices, dtype=np.int32)).astype(np.uint8) * 255
+            # Clean up the mask
+            skin_mask = _binary_open_close(skin_mask, kernel_size=3, iterations=1)
+        except Exception as e:
+            print(f"Face parsing error: {e}")
+            # Create a simple elliptical mask
+            h, w = face_crop.shape[:2]
+            skin_mask = np.zeros((h, w), dtype=np.uint8)
+            center_y, center_x = h // 2, w // 2
+            mask_size = min(h, w) // 3
+            cv2.ellipse(skin_mask, (center_x, center_y),
+                       (mask_size, mask_size // 2), 0, 0, 360, 255, -1)
+        # Ensure we have some skin pixels
+        if np.sum(skin_mask) == 0:
+            # Use entire face crop as fallback
+            skin_mask = np.ones((face_crop.shape[0], face_crop.shape[1]), dtype=np.uint8) * 255
+        # Compute skin color
+        hex_code, color_rgb = _compute_skin_color_hex(face_crop, skin_mask)
+        # Prepare swatch
+        swatch = _solid_color_image(color_rgb)
+        # Create mask overlay for debug
+        full_mask = np.zeros((height, width), dtype=np.uint8)
+        full_mask[y1:y2, x1:x2] = skin_mask
+        # Create colored mask
+        color_mask = np.zeros_like(rgb)
+        color_mask[:, :, 0] = 0  # Red channel
+        color_mask[:, :, 1] = 255  # Green channel for skin mask
+        color_mask[:, :, 2] = 0  # Blue channel
+        # Apply mask
+        mask_3d = np.stack([full_mask] * 3, axis=2) / 255.0
+        overlay = (rgb * (1 - mask_3d) + color_mask * mask_3d).astype(np.uint8)
+        # Add hex code to debug image
+        cv2.putText(debug_img, hex_code, (10, 30),
+                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
+        cv2.putText(debug_img, hex_code, (10, 30),
+                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 1)
+        return hex_code, swatch, debug_img
+    except Exception as e:
+        error_msg = f"Error: {str(e)}"
+        print(error_msg)
+        # Return error state
+        error_color = np.array([255, 0, 0], dtype=np.uint8)  # Red for error
+        error_hex = "#FF0000"
+        error_swatch = _solid_color_image(error_color)
+        # Create error debug image
+        if 'rgb' in locals():
+            error_debug = rgb.copy()
+        else:
+            error_debug = np.zeros((300, 300, 3), dtype=np.uint8)
+            error_debug[:] = [100, 100, 100]
+        cv2.putText(error_debug, "ERROR", (50, 100),
+                   cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
+        cv2.putText(error_debug, error_msg[:30], (50, 150),
+                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
+        return error_hex, error_swatch, error_debug
 def _hex_html(hex_code: str) -> str:
         "display:flex;align-items:center;gap:12px;padding:8px 0;"
     )
     swatch_style = (
+        f"width:24px;height:24px;border-radius:4px;background:{hex_code};"
+        "border:2px solid #333;box-shadow:2px 2px 5px rgba(0,0,0,0.2);"
     )
     return (
         f"<div style='{style}'>"
         f"<div style='{swatch_style}'></div>"
+        f"<span style='font-family:monospace;font-size:18px;font-weight:bold;'>{hex_code}</span>"
         "</div>"
     )
+# Create Gradio interface
+with gr.Blocks(title="Skin Tone Detector", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
+        # 🎨 Skin Tone Hex Detector
+        Upload a photo with a face to detect the skin tone color. The app will return a HEX color code.
+        ### How it works:
+        1. Face detection using MediaPipe/OpenCV
+        2. Skin region segmentation using AI
+        3. Color extraction from skin pixels
+        4. HEX code generation
+        **Tip:** Use clear, well-lit frontal face photos for best results.
         """
     )
     with gr.Row():
+        with gr.Column(scale=1):
             input_image = gr.Image(
+                label="📷 Upload Face Image",
                 type="numpy",
                 image_mode="RGB",
+                height=400,
+                sources=["upload", "webcam"],
+                interactive=True
             )
+            with gr.Row():
+                run_btn = gr.Button("🔍 Detect Skin Tone", variant="primary", size="lg")
+                clear_btn = gr.Button("🗑️ Clear", variant="secondary")
+        with gr.Column(scale=1):
+            with gr.Group():
+                hex_output = gr.HTML(
+                    label="🎨 Detected Color",
+                    value="<div style='text-align:center;padding:20px;'>Upload an image to begin</div>"
+                )
+                swatch_output = gr.Image(
+                    label="Color Swatch",
+                    type="numpy",
+                    height=200,
+                    interactive=False
+                )
+            with gr.Accordion("🔍 Debug View", open=False):
+                debug_output = gr.Image(
+                    label="Detection Visualization",
+                    type="numpy",
+                    height=400,
+                    interactive=False
+                )
+                gr.Markdown("""
+                **Detection Legend:**
+                - 🟢 Green box: Face detected (MediaPipe/OpenCV)
+                - 🔴 Red box: Central crop (fallback)
+                - 🟡 Yellow overlay: Skin mask
+                """)
+    gr.Markdown("""
+    ### 📝 Notes:
+    - Works best with frontal face photos in good lighting
+    - Multiple detection methods ensure reliability
+    - Results may vary based on lighting and image quality
+    - The HEX code represents the median skin color from detected regions
+    """)
     def _run(image: Optional[np.ndarray]):
         if image is None:
+            return (
+                "<div style='text-align:center;padding:20px;color:#666;'>"
+                "Please upload an image first</div>",
+                np.zeros((200, 200, 3), dtype=np.uint8),
+                np.zeros((400, 400, 3), dtype=np.uint8)
+            )
+        try:
+            hex_code, swatch, debug = detect_skin_tone(image)
+            return _hex_html(hex_code), swatch, debug
+        except Exception as e:
+            error_html = f"""
+            <div style='text-align:center;padding:20px;color:#d00;'>
+                <h3>❌ Error</h3>
+                <p>{str(e)[:100]}...</p>
+                <p>Please try a different image.</p>
+            </div>
+            """
+            error_img = np.zeros((200, 200, 3), dtype=np.uint8)
+            error_img[:] = [255, 200, 200]  # Light red
+            return error_html, error_img, None
+    def _clear():
+        return (
+            None,
+            "<div style='text-align:center;padding:20px;color:#666;'>"
+            "Upload an image to begin</div>",
+            np.zeros((200, 200, 3), dtype=np.uint8),
+            np.zeros((400, 400, 3), dtype=np.uint8)
+        )
+    # Connect events
+    run_btn.click(
+        fn=_run,
+        inputs=[input_image],
+        outputs=[hex_output, swatch_output, debug_output]
+    )
+    clear_btn.click(
+        fn=_clear,
+        inputs=[],
+        outputs=[input_image, hex_output, swatch_output, debug_output]
+    )
 if __name__ == "__main__":
+    # Print startup message
+    print("=" * 60)
+    print("🚀 Starting Skin Tone Detector")
+    print("=" * 60)
+    print("\nAccess the app at: http://localhost:7860")
+    print("\nPress Ctrl+C to stop the server")
+    # Launch with better settings
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        debug=False,
+        show_error=True,
+        quiet=False
+    )