Spaces:

EtanHey
/

hand-detection-api

Sleeping

File size: 7,019 Bytes

"""
HuggingFace Spaces App for Hand/Arm Detection
Provides both Gradio UI and API endpoints
Model: https://huggingface.co/EtanHey/hand-sign-detection
"""

import gradio as gr
from ultralytics import YOLO
import numpy as np
from PIL import Image
import json
from typing import Dict, Tuple, Any
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Load the model
MODEL_PATH = "https://huggingface.co/EtanHey/hand-sign-detection/resolve/main/model.pt"
model = None

def load_model():
    """Load YOLO model from HuggingFace"""
    global model
    try:
        logger.info(f"Loading model from {MODEL_PATH}")
        model = YOLO(MODEL_PATH)
        logger.info("✅ Model loaded successfully!")
        return True
    except Exception as e:
        logger.error(f"❌ Failed to load model: {e}")
        return False

# Load model on startup
load_model()

# Class names (alphabetical order as YOLO expects)
CLASS_NAMES = ['arm', 'hand', 'not_hand']
CLASS_LABELS = {
    'arm': '💪 Arm',
    'hand': '✋ Hand',
    'not_hand': '❌ Not Hand/Arm'
}

def process_image(image: Image.Image) -> Dict[str, Any]:
    """Process image and return detection results"""
    if model is None:
        return {
            "error": "Model not loaded",
            "class": "unknown",
            "confidence": 0.0,
            "probabilities": {"hand": 0, "arm": 0, "not_hand": 0}
        }

    try:
        # Convert PIL image to RGB if needed
        if image.mode != 'RGB':
            image = image.convert('RGB')

        # Run inference
        results = model.predict(image, verbose=False)

        if not results or not results[0].probs:
            return {
                "class": "not_hand",
                "confidence": 0.0,
                "probabilities": {"hand": 0, "arm": 0, "not_hand": 1.0}
            }

        # Extract probabilities
        probs = results[0].probs
        top_class_idx = probs.top1
        top_confidence = float(probs.top1conf)

        # Build probability dictionary
        probabilities = {
            "hand": float(probs.data[1]),  # Index 1
            "arm": float(probs.data[0]),   # Index 0
            "not_hand": float(probs.data[2]) # Index 2
        }

        return {
            "class": CLASS_NAMES[top_class_idx],
            "confidence": top_confidence,
            "probabilities": probabilities,
            "label": CLASS_LABELS[CLASS_NAMES[top_class_idx]]
        }

    except Exception as e:
        logger.error(f"Error processing image: {e}")
        return {
            "error": str(e),
            "class": "error",
            "confidence": 0.0,
            "probabilities": {"hand": 0, "arm": 0, "not_hand": 0}
        }

def gradio_predict(image: Image.Image) -> Tuple[str, Dict, str]:
    """Gradio interface prediction function"""
    if image is None:
        return "Please upload an image", {}, ""

    # Process the image
    result = process_image(image)

    # Format output
    if "error" in result:
        return f"Error: {result['error']}", {}, ""

    # Create confidence bars
    confidence_scores = {
        "✋ Hand": result["probabilities"]["hand"],
        "💪 Arm": result["probabilities"]["arm"],
        "❌ Neither": result["probabilities"]["not_hand"]
    }

    # Create detailed output
    main_label = result["label"]
    confidence = result["confidence"]

    output_text = f"""
    ## Detection Result

    **Detected:** {main_label}
    **Confidence:** {confidence:.1%}

    ### Detailed Probabilities:
    - Hand: {result['probabilities']['hand']:.1%}
    - Arm: {result['probabilities']['arm']:.1%}
    - Not Hand/Arm: {result['probabilities']['not_hand']:.1%}

    ### Understanding the Classes:
    - **Hand**: Close-up view with fingers visible
    - **Arm**: Forearm or elbow area without fingers
    - **Not Hand/Arm**: Neither hand nor arm detected
    """

    # Create JSON output for developers
    json_output = json.dumps(result, indent=2)

    return output_text, confidence_scores, json_output

# API prediction function for Gradio's built-in API
def api_predict(image: Image.Image) -> Dict[str, Any]:
    """API function that returns raw results for API access"""
    if image is None:
        return {"error": "No image provided"}

    return process_image(image)

# Gradio Interface
def create_gradio_interface():
    """Create the Gradio interface"""

    # Custom CSS for better styling
    custom_css = """
    .gradio-container {
        font-family: 'Inter', sans-serif;
    }
    .output-class {
        font-size: 24px;
        font-weight: bold;
    }
    """

    # Create interface (no examples to avoid file errors)
    interface = gr.Interface(
        fn=gradio_predict,
        inputs=[
            gr.Image(
                type="pil",
                label="Upload Image",
                sources=["upload", "webcam", "clipboard"]
            )
        ],
        outputs=[
            gr.Markdown(label="Detection Result"),
            gr.Label(label="Confidence Scores", num_top_classes=3),
            gr.JSON(label="API Response (for developers)")
        ],
        title="🤚 Hand/Arm Detection AI",
        description="""
        Upload an image or use your webcam to detect hands and arms.

        **Model:** YOLOv8 trained on 1,740 images | **Accuracy:** 96.3%

        **API Access:** Use Gradio's built-in API endpoints for programmatic access.
        """,
        article="""
        ### About
        This model distinguishes between:
        - **Hands**: Close-up views with visible fingers
        - **Arms**: Forearm/elbow areas without fingers
        - **Neither**: Images without hands or arms

        ### API Usage
        ```python
        from gradio_client import Client

        # Connect to the API
        client = Client("https://huggingface.co/spaces/EtanHey/hand-detection-api")

        # Make prediction
        result = client.predict(
            image="path/to/your/image.jpg",
            api_name="/predict"
        )
        print(result)
        ```

        ### Model Card
        View the full model details at [HuggingFace Model Hub](https://huggingface.co/EtanHey/hand-sign-detection)
        """,
        css=custom_css,
        theme=gr.themes.Soft()
    )

    return interface

# Create and launch Gradio interface
if __name__ == "__main__":
    # Create the main interface
    interface = create_gradio_interface()

    # Create API interface for programmatic access
    api_interface = gr.Interface(
        fn=api_predict,
        inputs=gr.Image(type="pil"),
        outputs=gr.JSON(),
        title="Hand Detection API"
    )

    # Combine both interfaces in a tabbed interface
    demo = gr.TabbedInterface(
        [interface, api_interface],
        ["Web Interface", "API"],
        title="🤚 Hand/Arm Detection AI"
    )

    # Launch on default HuggingFace Spaces port (7860)
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )