metadata
tags:
- yolov8
- image-classification
- hand-detection
- computer-vision
library_name: ultralytics
Hand Detection Model (YOLOv8)
This model classifies images into three categories:
- hand: Close-up hand with fingers visible (✋)
- arm: Forearm or elbow area (💪)
- not_hand: Neither hand nor arm (❌)
Quick Start
from ultralytics import YOLO
# Load model directly from HuggingFace
model = YOLO('https://huggingface.co/EtanHey/hand-detection-3class/resolve/main/model.pt')
# Predict on an image
results = model.predict('image.jpg')
# Get the prediction
probs = results[0].probs
class_id = probs.top1 # 0=arm, 1=hand, 2=not_hand (alphabetical order!)
confidence = probs.top1conf.item()
# Interpret results
if class_id == 1: # hand is index 1
print(f"✋ Hand detected: {confidence:.1%}")
elif class_id == 0: # arm is index 0
print(f"💪 Arm detected: {confidence:.1%}")
else: # not_hand is index 2
print(f"❌ No hand/arm detected: {confidence:.1%}")
Live Demo (Webcam)
import cv2
from ultralytics import YOLO
model = YOLO('https://huggingface.co/EtanHey/hand-detection-3class/resolve/main/model.pt')
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
if not ret:
break
results = model(frame)
probs = results[0].probs
# YOLO uses alphabetical order!
classes = ['arm', 'hand', 'not_hand'] # 0=arm, 1=hand, 2=not_hand
label = f"{classes[probs.top1]}: {probs.top1conf:.1%}"
cv2.putText(frame, label, (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('Hand Detection', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
Use with Vercel AI SDK
npm install ai openai
// app/components/hand-detector.tsx
'use client';
import { useChat } from 'ai/react';
import { useState } from 'react';
export function HandDetectorWithAI() {
const [detection, setDetection] = useState(null);
const { messages, input, handleSubmit } = useChat({
api: '/api/chat',
initialMessages: [{
role: 'system',
content: 'You help interpret hand gestures and signs.'
}]
});
const detectAndAnalyze = async (file) => {
// 1. Detect hand
const formData = new FormData();
formData.append('image', file);
const response = await fetch('/api/detect-hand', {
method: 'POST',
body: formData
});
const result = await response.json();
setDetection(result);
// 2. If hand detected, ask AI about gesture
if (result.class === 'hand') {
await handleSubmit({
preventDefault: () => {},
currentTarget: {
input: { value: `What gesture is this hand making? Confidence: ${result.confidence}%` }
}
});
}
};
return (
<div>
<input type="file" onChange={(e) => detectAndAnalyze(e.target.files[0])} />
{detection && <p>Detected: {detection.class} ({detection.confidence}%)</p>}
{messages.map(m => (
<div key={m.id}>{m.role}: {m.content}</div>
))}
</div>
);
}
// app/api/chat/route.ts
import { OpenAIStream, StreamingTextResponse } from 'ai';
export async function POST(req: Request) {
const { messages } = await req.json();
// Your OpenAI/AI provider logic here
const stream = OpenAIStream(response);
return new StreamingTextResponse(stream);
}
Use in Next.js/Node.js
Option 1: FastAPI Backend + Next.js
Backend (Python):
from fastapi import FastAPI, File, UploadFile
from ultralytics import YOLO
from PIL import Image
import io
app = FastAPI()
model = YOLO('https://huggingface.co/EtanHey/hand-detection-3class/resolve/main/model.pt')
@app.post("/detect")
async def detect(file: UploadFile = File(...)):
image = Image.open(io.BytesIO(await file.read()))
results = model.predict(image)
probs = results[0].probs
return {
"class": ['arm', 'hand', 'not_hand'][probs.top1], # alphabetical order
"confidence": float(probs.top1conf)
}
Frontend (Next.js):
async function detectHand(imageFile) {
const formData = new FormData();
formData.append('file', imageFile);
const response = await fetch('http://localhost:8000/detect', {
method: 'POST',
body: formData
});
const result = await response.json();
console.log(`Detected: ${result.class} (${result.confidence * 100}%)`);
}
Option 2: ONNX for Browser
# Convert to ONNX first
from ultralytics import YOLO
model = YOLO('model.pt')
model.export(format='onnx')
Then use with ONNX Runtime Web:
import * as ort from 'onnxruntime-web';
const session = await ort.InferenceSession.create('/model.onnx');
// Process and run inference...
React Native
const detectHand = async (imageUri) => {
const formData = new FormData();
formData.append('image', {
uri: imageUri,
type: 'image/jpeg',
name: 'photo.jpg'
});
const response = await fetch('YOUR_API_URL/detect', {
method: 'POST',
body: formData
});
const result = await response.json();
Alert.alert(`Detected: ${result.class}`);
};
cURL Test
curl -X POST -F "file=@test.jpg" http://localhost:8000/detect
Model Details
- Architecture: YOLOv8s-cls (5M parameters)
- Classes: 3 (arm=0, hand=1, not_hand=2) - alphabetical order
- Input Size: 224x224
- Accuracy: >96% on validation set
- Size: ~3MB
Training Data
- Total Images: 1,740
- Distribution:
- Hand: 704 images (40%)
- Arm: 320 images (18%)
- Not Hand: 462 images (27%)
- Val: 254 images (15%)
Performance
| Metric | Value |
|---|---|
| Validation Accuracy | 96.3% |
| Inference Speed | 30+ FPS (Apple M1) |
| Model Size | 2.97 MB |
License
MIT - Free for commercial use
Citation
If you use this model, please cite:
@software{hand_detection_yolo_2024,
author = {EtanHey},
title = {Hand Detection YOLOv8 Model},
year = {2024},
publisher = {HuggingFace},
url = {https://huggingface.co/EtanHey/hand-detection-3class}
}