Spaces:

EtanHey
/

hand-detection-api

Sleeping

App Files Files Community

hand-detection-api / app.py

EtanHey

Fix: Remove example images to prevent FileNotFoundError

e4bcd7e 5 months ago

raw

history blame contribute delete

7.02 kB

	"""
	HuggingFace Spaces App for Hand/Arm Detection
	Provides both Gradio UI and API endpoints
	Model: https://huggingface.co/EtanHey/hand-sign-detection
	"""

	import gradio as gr
	from ultralytics import YOLO
	import numpy as np
	from PIL import Image
	import json
	from typing import Dict, Tuple, Any
	import logging

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Load the model
	MODEL_PATH = "https://huggingface.co/EtanHey/hand-sign-detection/resolve/main/model.pt"
	model = None

	def load_model():
	"""Load YOLO model from HuggingFace"""
	global model
	try:
	logger.info(f"Loading model from {MODEL_PATH}")
	model = YOLO(MODEL_PATH)
	logger.info("✅ Model loaded successfully!")
	return True
	except Exception as e:
	logger.error(f"❌ Failed to load model: {e}")
	return False

	# Load model on startup
	load_model()

	# Class names (alphabetical order as YOLO expects)
	CLASS_NAMES = ['arm', 'hand', 'not_hand']
	CLASS_LABELS = {
	'arm': '💪 Arm',
	'hand': '✋ Hand',
	'not_hand': '❌ Not Hand/Arm'
	}

	def process_image(image: Image.Image) -> Dict[str, Any]:
	"""Process image and return detection results"""
	if model is None:
	return {
	"error": "Model not loaded",
	"class": "unknown",
	"confidence": 0.0,
	"probabilities": {"hand": 0, "arm": 0, "not_hand": 0}
	}

	try:
	# Convert PIL image to RGB if needed
	if image.mode != 'RGB':
	image = image.convert('RGB')

	# Run inference
	results = model.predict(image, verbose=False)

	if not results or not results[0].probs:
	return {
	"class": "not_hand",
	"confidence": 0.0,
	"probabilities": {"hand": 0, "arm": 0, "not_hand": 1.0}
	}

	# Extract probabilities
	probs = results[0].probs
	top_class_idx = probs.top1
	top_confidence = float(probs.top1conf)

	# Build probability dictionary
	probabilities = {
	"hand": float(probs.data[1]), # Index 1
	"arm": float(probs.data[0]), # Index 0
	"not_hand": float(probs.data[2]) # Index 2
	}

	return {
	"class": CLASS_NAMES[top_class_idx],
	"confidence": top_confidence,
	"probabilities": probabilities,
	"label": CLASS_LABELS[CLASS_NAMES[top_class_idx]]
	}

	except Exception as e:
	logger.error(f"Error processing image: {e}")
	return {
	"error": str(e),
	"class": "error",
	"confidence": 0.0,
	"probabilities": {"hand": 0, "arm": 0, "not_hand": 0}
	}

	def gradio_predict(image: Image.Image) -> Tuple[str, Dict, str]:
	"""Gradio interface prediction function"""
	if image is None:
	return "Please upload an image", {}, ""

	# Process the image
	result = process_image(image)

	# Format output
	if "error" in result:
	return f"Error: {result['error']}", {}, ""

	# Create confidence bars
	confidence_scores = {
	"✋ Hand": result["probabilities"]["hand"],
	"💪 Arm": result["probabilities"]["arm"],
	"❌ Neither": result["probabilities"]["not_hand"]
	}

	# Create detailed output
	main_label = result["label"]
	confidence = result["confidence"]

	output_text = f"""
	## Detection Result

	Detected: {main_label}
	Confidence: {confidence:.1%}

	### Detailed Probabilities:
	- Hand: {result['probabilities']['hand']:.1%}
	- Arm: {result['probabilities']['arm']:.1%}
	- Not Hand/Arm: {result['probabilities']['not_hand']:.1%}

	### Understanding the Classes:
	- Hand: Close-up view with fingers visible
	- Arm: Forearm or elbow area without fingers
	- Not Hand/Arm: Neither hand nor arm detected
	"""

	# Create JSON output for developers
	json_output = json.dumps(result, indent=2)

	return output_text, confidence_scores, json_output

	# API prediction function for Gradio's built-in API
	def api_predict(image: Image.Image) -> Dict[str, Any]:
	"""API function that returns raw results for API access"""
	if image is None:
	return {"error": "No image provided"}

	return process_image(image)

	# Gradio Interface
	def create_gradio_interface():
	"""Create the Gradio interface"""

	# Custom CSS for better styling
	custom_css = """
	.gradio-container {
	font-family: 'Inter', sans-serif;
	}
	.output-class {
	font-size: 24px;
	font-weight: bold;
	}
	"""

	# Create interface (no examples to avoid file errors)
	interface = gr.Interface(
	fn=gradio_predict,
	inputs=[
	gr.Image(
	type="pil",
	label="Upload Image",
	sources=["upload", "webcam", "clipboard"]
	)
	],
	outputs=[
	gr.Markdown(label="Detection Result"),
	gr.Label(label="Confidence Scores", num_top_classes=3),
	gr.JSON(label="API Response (for developers)")
	],
	title="🤚 Hand/Arm Detection AI",
	description="""
	Upload an image or use your webcam to detect hands and arms.

	Model: YOLOv8 trained on 1,740 images \| Accuracy: 96.3%

	API Access: Use Gradio's built-in API endpoints for programmatic access.
	""",
	article="""
	### About
	This model distinguishes between:
	- Hands: Close-up views with visible fingers
	- Arms: Forearm/elbow areas without fingers
	- Neither: Images without hands or arms

	### API Usage
	```python
	from gradio_client import Client

	# Connect to the API
	client = Client("https://huggingface.co/spaces/EtanHey/hand-detection-api")

	# Make prediction
	result = client.predict(
	image="path/to/your/image.jpg",
	api_name="/predict"
	)
	print(result)
	```

	### Model Card
	View the full model details at [HuggingFace Model Hub](https://huggingface.co/EtanHey/hand-sign-detection)
	""",
	css=custom_css,
	theme=gr.themes.Soft()
	)

	return interface

	# Create and launch Gradio interface
	if __name__ == "__main__":
	# Create the main interface
	interface = create_gradio_interface()

	# Create API interface for programmatic access
	api_interface = gr.Interface(
	fn=api_predict,
	inputs=gr.Image(type="pil"),
	outputs=gr.JSON(),
	title="Hand Detection API"
	)

	# Combine both interfaces in a tabbed interface
	demo = gr.TabbedInterface(
	[interface, api_interface],
	["Web Interface", "API"],
	title="🤚 Hand/Arm Detection AI"
	)

	# Launch on default HuggingFace Spaces port (7860)
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)