File size: 7,019 Bytes
777c63c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6722694
 
 
 
 
777c63c
6722694
777c63c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4bcd7e
777c63c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6722694
777c63c
 
 
 
 
 
 
 
 
 
6722694
777c63c
6722694
 
 
 
 
 
 
777c63c
6722694
777c63c
 
 
 
 
 
 
 
 
 
 
 
 
6722694
777c63c
6722694
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
777c63c
6722694
 
777c63c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
"""
HuggingFace Spaces App for Hand/Arm Detection
Provides both Gradio UI and API endpoints
Model: https://huggingface.co/EtanHey/hand-sign-detection
"""

import gradio as gr
from ultralytics import YOLO
import numpy as np
from PIL import Image
import json
from typing import Dict, Tuple, Any
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Load the model
MODEL_PATH = "https://huggingface.co/EtanHey/hand-sign-detection/resolve/main/model.pt"
model = None

def load_model():
    """Load YOLO model from HuggingFace"""
    global model
    try:
        logger.info(f"Loading model from {MODEL_PATH}")
        model = YOLO(MODEL_PATH)
        logger.info("βœ… Model loaded successfully!")
        return True
    except Exception as e:
        logger.error(f"❌ Failed to load model: {e}")
        return False

# Load model on startup
load_model()

# Class names (alphabetical order as YOLO expects)
CLASS_NAMES = ['arm', 'hand', 'not_hand']
CLASS_LABELS = {
    'arm': 'πŸ’ͺ Arm',
    'hand': 'βœ‹ Hand',
    'not_hand': '❌ Not Hand/Arm'
}

def process_image(image: Image.Image) -> Dict[str, Any]:
    """Process image and return detection results"""
    if model is None:
        return {
            "error": "Model not loaded",
            "class": "unknown",
            "confidence": 0.0,
            "probabilities": {"hand": 0, "arm": 0, "not_hand": 0}
        }

    try:
        # Convert PIL image to RGB if needed
        if image.mode != 'RGB':
            image = image.convert('RGB')

        # Run inference
        results = model.predict(image, verbose=False)

        if not results or not results[0].probs:
            return {
                "class": "not_hand",
                "confidence": 0.0,
                "probabilities": {"hand": 0, "arm": 0, "not_hand": 1.0}
            }

        # Extract probabilities
        probs = results[0].probs
        top_class_idx = probs.top1
        top_confidence = float(probs.top1conf)

        # Build probability dictionary
        probabilities = {
            "hand": float(probs.data[1]),  # Index 1
            "arm": float(probs.data[0]),   # Index 0
            "not_hand": float(probs.data[2]) # Index 2
        }

        return {
            "class": CLASS_NAMES[top_class_idx],
            "confidence": top_confidence,
            "probabilities": probabilities,
            "label": CLASS_LABELS[CLASS_NAMES[top_class_idx]]
        }

    except Exception as e:
        logger.error(f"Error processing image: {e}")
        return {
            "error": str(e),
            "class": "error",
            "confidence": 0.0,
            "probabilities": {"hand": 0, "arm": 0, "not_hand": 0}
        }

def gradio_predict(image: Image.Image) -> Tuple[str, Dict, str]:
    """Gradio interface prediction function"""
    if image is None:
        return "Please upload an image", {}, ""

    # Process the image
    result = process_image(image)

    # Format output
    if "error" in result:
        return f"Error: {result['error']}", {}, ""

    # Create confidence bars
    confidence_scores = {
        "βœ‹ Hand": result["probabilities"]["hand"],
        "πŸ’ͺ Arm": result["probabilities"]["arm"],
        "❌ Neither": result["probabilities"]["not_hand"]
    }

    # Create detailed output
    main_label = result["label"]
    confidence = result["confidence"]

    output_text = f"""
    ## Detection Result

    **Detected:** {main_label}
    **Confidence:** {confidence:.1%}

    ### Detailed Probabilities:
    - Hand: {result['probabilities']['hand']:.1%}
    - Arm: {result['probabilities']['arm']:.1%}
    - Not Hand/Arm: {result['probabilities']['not_hand']:.1%}

    ### Understanding the Classes:
    - **Hand**: Close-up view with fingers visible
    - **Arm**: Forearm or elbow area without fingers
    - **Not Hand/Arm**: Neither hand nor arm detected
    """

    # Create JSON output for developers
    json_output = json.dumps(result, indent=2)

    return output_text, confidence_scores, json_output

# API prediction function for Gradio's built-in API
def api_predict(image: Image.Image) -> Dict[str, Any]:
    """API function that returns raw results for API access"""
    if image is None:
        return {"error": "No image provided"}

    return process_image(image)

# Gradio Interface
def create_gradio_interface():
    """Create the Gradio interface"""

    # Custom CSS for better styling
    custom_css = """
    .gradio-container {
        font-family: 'Inter', sans-serif;
    }
    .output-class {
        font-size: 24px;
        font-weight: bold;
    }
    """

    # Create interface (no examples to avoid file errors)
    interface = gr.Interface(
        fn=gradio_predict,
        inputs=[
            gr.Image(
                type="pil",
                label="Upload Image",
                sources=["upload", "webcam", "clipboard"]
            )
        ],
        outputs=[
            gr.Markdown(label="Detection Result"),
            gr.Label(label="Confidence Scores", num_top_classes=3),
            gr.JSON(label="API Response (for developers)")
        ],
        title="🀚 Hand/Arm Detection AI",
        description="""
        Upload an image or use your webcam to detect hands and arms.

        **Model:** YOLOv8 trained on 1,740 images | **Accuracy:** 96.3%

        **API Access:** Use Gradio's built-in API endpoints for programmatic access.
        """,
        article="""
        ### About
        This model distinguishes between:
        - **Hands**: Close-up views with visible fingers
        - **Arms**: Forearm/elbow areas without fingers
        - **Neither**: Images without hands or arms

        ### API Usage
        ```python
        from gradio_client import Client

        # Connect to the API
        client = Client("https://huggingface.co/spaces/EtanHey/hand-detection-api")

        # Make prediction
        result = client.predict(
            image="path/to/your/image.jpg",
            api_name="/predict"
        )
        print(result)
        ```

        ### Model Card
        View the full model details at [HuggingFace Model Hub](https://huggingface.co/EtanHey/hand-sign-detection)
        """,
        css=custom_css,
        theme=gr.themes.Soft()
    )

    return interface

# Create and launch Gradio interface
if __name__ == "__main__":
    # Create the main interface
    interface = create_gradio_interface()

    # Create API interface for programmatic access
    api_interface = gr.Interface(
        fn=api_predict,
        inputs=gr.Image(type="pil"),
        outputs=gr.JSON(),
        title="Hand Detection API"
    )

    # Combine both interfaces in a tabbed interface
    demo = gr.TabbedInterface(
        [interface, api_interface],
        ["Web Interface", "API"],
        title="🀚 Hand/Arm Detection AI"
    )

    # Launch on default HuggingFace Spaces port (7860)
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )