EtanHey's picture
Fix: Remove example images to prevent FileNotFoundError
e4bcd7e
"""
HuggingFace Spaces App for Hand/Arm Detection
Provides both Gradio UI and API endpoints
Model: https://huggingface.co/EtanHey/hand-sign-detection
"""
import gradio as gr
from ultralytics import YOLO
import numpy as np
from PIL import Image
import json
from typing import Dict, Tuple, Any
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Load the model
MODEL_PATH = "https://huggingface.co/EtanHey/hand-sign-detection/resolve/main/model.pt"
model = None
def load_model():
"""Load YOLO model from HuggingFace"""
global model
try:
logger.info(f"Loading model from {MODEL_PATH}")
model = YOLO(MODEL_PATH)
logger.info("βœ… Model loaded successfully!")
return True
except Exception as e:
logger.error(f"❌ Failed to load model: {e}")
return False
# Load model on startup
load_model()
# Class names (alphabetical order as YOLO expects)
CLASS_NAMES = ['arm', 'hand', 'not_hand']
CLASS_LABELS = {
'arm': 'πŸ’ͺ Arm',
'hand': 'βœ‹ Hand',
'not_hand': '❌ Not Hand/Arm'
}
def process_image(image: Image.Image) -> Dict[str, Any]:
"""Process image and return detection results"""
if model is None:
return {
"error": "Model not loaded",
"class": "unknown",
"confidence": 0.0,
"probabilities": {"hand": 0, "arm": 0, "not_hand": 0}
}
try:
# Convert PIL image to RGB if needed
if image.mode != 'RGB':
image = image.convert('RGB')
# Run inference
results = model.predict(image, verbose=False)
if not results or not results[0].probs:
return {
"class": "not_hand",
"confidence": 0.0,
"probabilities": {"hand": 0, "arm": 0, "not_hand": 1.0}
}
# Extract probabilities
probs = results[0].probs
top_class_idx = probs.top1
top_confidence = float(probs.top1conf)
# Build probability dictionary
probabilities = {
"hand": float(probs.data[1]), # Index 1
"arm": float(probs.data[0]), # Index 0
"not_hand": float(probs.data[2]) # Index 2
}
return {
"class": CLASS_NAMES[top_class_idx],
"confidence": top_confidence,
"probabilities": probabilities,
"label": CLASS_LABELS[CLASS_NAMES[top_class_idx]]
}
except Exception as e:
logger.error(f"Error processing image: {e}")
return {
"error": str(e),
"class": "error",
"confidence": 0.0,
"probabilities": {"hand": 0, "arm": 0, "not_hand": 0}
}
def gradio_predict(image: Image.Image) -> Tuple[str, Dict, str]:
"""Gradio interface prediction function"""
if image is None:
return "Please upload an image", {}, ""
# Process the image
result = process_image(image)
# Format output
if "error" in result:
return f"Error: {result['error']}", {}, ""
# Create confidence bars
confidence_scores = {
"βœ‹ Hand": result["probabilities"]["hand"],
"πŸ’ͺ Arm": result["probabilities"]["arm"],
"❌ Neither": result["probabilities"]["not_hand"]
}
# Create detailed output
main_label = result["label"]
confidence = result["confidence"]
output_text = f"""
## Detection Result
**Detected:** {main_label}
**Confidence:** {confidence:.1%}
### Detailed Probabilities:
- Hand: {result['probabilities']['hand']:.1%}
- Arm: {result['probabilities']['arm']:.1%}
- Not Hand/Arm: {result['probabilities']['not_hand']:.1%}
### Understanding the Classes:
- **Hand**: Close-up view with fingers visible
- **Arm**: Forearm or elbow area without fingers
- **Not Hand/Arm**: Neither hand nor arm detected
"""
# Create JSON output for developers
json_output = json.dumps(result, indent=2)
return output_text, confidence_scores, json_output
# API prediction function for Gradio's built-in API
def api_predict(image: Image.Image) -> Dict[str, Any]:
"""API function that returns raw results for API access"""
if image is None:
return {"error": "No image provided"}
return process_image(image)
# Gradio Interface
def create_gradio_interface():
"""Create the Gradio interface"""
# Custom CSS for better styling
custom_css = """
.gradio-container {
font-family: 'Inter', sans-serif;
}
.output-class {
font-size: 24px;
font-weight: bold;
}
"""
# Create interface (no examples to avoid file errors)
interface = gr.Interface(
fn=gradio_predict,
inputs=[
gr.Image(
type="pil",
label="Upload Image",
sources=["upload", "webcam", "clipboard"]
)
],
outputs=[
gr.Markdown(label="Detection Result"),
gr.Label(label="Confidence Scores", num_top_classes=3),
gr.JSON(label="API Response (for developers)")
],
title="🀚 Hand/Arm Detection AI",
description="""
Upload an image or use your webcam to detect hands and arms.
**Model:** YOLOv8 trained on 1,740 images | **Accuracy:** 96.3%
**API Access:** Use Gradio's built-in API endpoints for programmatic access.
""",
article="""
### About
This model distinguishes between:
- **Hands**: Close-up views with visible fingers
- **Arms**: Forearm/elbow areas without fingers
- **Neither**: Images without hands or arms
### API Usage
```python
from gradio_client import Client
# Connect to the API
client = Client("https://huggingface.co/spaces/EtanHey/hand-detection-api")
# Make prediction
result = client.predict(
image="path/to/your/image.jpg",
api_name="/predict"
)
print(result)
```
### Model Card
View the full model details at [HuggingFace Model Hub](https://huggingface.co/EtanHey/hand-sign-detection)
""",
css=custom_css,
theme=gr.themes.Soft()
)
return interface
# Create and launch Gradio interface
if __name__ == "__main__":
# Create the main interface
interface = create_gradio_interface()
# Create API interface for programmatic access
api_interface = gr.Interface(
fn=api_predict,
inputs=gr.Image(type="pil"),
outputs=gr.JSON(),
title="Hand Detection API"
)
# Combine both interfaces in a tabbed interface
demo = gr.TabbedInterface(
[interface, api_interface],
["Web Interface", "API"],
title="🀚 Hand/Arm Detection AI"
)
# Launch on default HuggingFace Spaces port (7860)
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
)