hand-detection-3class / README.md

EtanHey

Update model card with cleaner examples

96097e9 verified 5 months ago

preview code

raw

history blame contribute delete

6.24 kB

metadata

tags:
  - yolov8
  - image-classification
  - hand-detection
  - computer-vision
library_name: ultralytics

Hand Detection Model (YOLOv8)

This model classifies images into three categories:

hand: Close-up hand with fingers visible (✋)
arm: Forearm or elbow area (💪)
not_hand: Neither hand nor arm (❌)

Quick Start

from ultralytics import YOLO

# Load model directly from HuggingFace
model = YOLO('https://huggingface.co/EtanHey/hand-detection-3class/resolve/main/model.pt')

# Predict on an image
results = model.predict('image.jpg')

# Get the prediction
probs = results[0].probs
class_id = probs.top1  # 0=arm, 1=hand, 2=not_hand (alphabetical order!)
confidence = probs.top1conf.item()

# Interpret results
if class_id == 1:  # hand is index 1
    print(f"✋ Hand detected: {confidence:.1%}")
elif class_id == 0:  # arm is index 0
    print(f"💪 Arm detected: {confidence:.1%}")
else:  # not_hand is index 2
    print(f"❌ No hand/arm detected: {confidence:.1%}")

Live Demo (Webcam)

import cv2
from ultralytics import YOLO

model = YOLO('https://huggingface.co/EtanHey/hand-detection-3class/resolve/main/model.pt')
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    results = model(frame)
    probs = results[0].probs

    # YOLO uses alphabetical order!
    classes = ['arm', 'hand', 'not_hand']  # 0=arm, 1=hand, 2=not_hand
    label = f"{classes[probs.top1]}: {probs.top1conf:.1%}"

    cv2.putText(frame, label, (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow('Hand Detection', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

Use with Vercel AI SDK

npm install ai openai

// app/components/hand-detector.tsx
'use client';

import { useChat } from 'ai/react';
import { useState } from 'react';

export function HandDetectorWithAI() {
  const [detection, setDetection] = useState(null);
  const { messages, input, handleSubmit } = useChat({
    api: '/api/chat',
    initialMessages: [{
      role: 'system',
      content: 'You help interpret hand gestures and signs.'
    }]
  });

  const detectAndAnalyze = async (file) => {
    // 1. Detect hand
    const formData = new FormData();
    formData.append('image', file);

    const response = await fetch('/api/detect-hand', {
      method: 'POST',
      body: formData
    });

    const result = await response.json();
    setDetection(result);

    // 2. If hand detected, ask AI about gesture
    if (result.class === 'hand') {
      await handleSubmit({
        preventDefault: () => {},
        currentTarget: {
          input: { value: `What gesture is this hand making? Confidence: ${result.confidence}%` }
        }
      });
    }
  };

  return (
    <div>
      <input type="file" onChange={(e) => detectAndAnalyze(e.target.files[0])} />
      {detection && <p>Detected: {detection.class} ({detection.confidence}%)</p>}
      {messages.map(m => (
        <div key={m.id}>{m.role}: {m.content}</div>
      ))}
    </div>
  );
}

// app/api/chat/route.ts
import { OpenAIStream, StreamingTextResponse } from 'ai';

export async function POST(req: Request) {
  const { messages } = await req.json();

  // Your OpenAI/AI provider logic here
  const stream = OpenAIStream(response);
  return new StreamingTextResponse(stream);
}

Use in Next.js/Node.js

Option 1: FastAPI Backend + Next.js

Backend (Python):

from fastapi import FastAPI, File, UploadFile
from ultralytics import YOLO
from PIL import Image
import io

app = FastAPI()
model = YOLO('https://huggingface.co/EtanHey/hand-detection-3class/resolve/main/model.pt')

@app.post("/detect")
async def detect(file: UploadFile = File(...)):
    image = Image.open(io.BytesIO(await file.read()))
    results = model.predict(image)
    probs = results[0].probs

    return {
        "class": ['arm', 'hand', 'not_hand'][probs.top1],  # alphabetical order
        "confidence": float(probs.top1conf)
    }

Frontend (Next.js):

async function detectHand(imageFile) {
    const formData = new FormData();
    formData.append('file', imageFile);

    const response = await fetch('http://localhost:8000/detect', {
        method: 'POST',
        body: formData
    });

    const result = await response.json();
    console.log(`Detected: ${result.class} (${result.confidence * 100}%)`);
}

Option 2: ONNX for Browser

# Convert to ONNX first
from ultralytics import YOLO
model = YOLO('model.pt')
model.export(format='onnx')

Then use with ONNX Runtime Web:

import * as ort from 'onnxruntime-web';

const session = await ort.InferenceSession.create('/model.onnx');
// Process and run inference...

React Native

const detectHand = async (imageUri) => {
    const formData = new FormData();
    formData.append('image', {
        uri: imageUri,
        type: 'image/jpeg',
        name: 'photo.jpg'
    });

    const response = await fetch('YOUR_API_URL/detect', {
        method: 'POST',
        body: formData
    });

    const result = await response.json();
    Alert.alert(`Detected: ${result.class}`);
};

cURL Test

curl -X POST -F "file=@test.jpg" http://localhost:8000/detect

Model Details

Architecture: YOLOv8s-cls (5M parameters)
Classes: 3 (arm=0, hand=1, not_hand=2) - alphabetical order
Input Size: 224x224
Accuracy: >96% on validation set
Size: ~3MB

Training Data

Total Images: 1,740
Distribution:
- Hand: 704 images (40%)
- Arm: 320 images (18%)
- Not Hand: 462 images (27%)
- Val: 254 images (15%)

Performance

Metric	Value
Validation Accuracy	96.3%
Inference Speed	30+ FPS (Apple M1)
Model Size	2.97 MB

License

MIT - Free for commercial use

Citation

If you use this model, please cite:

@software{hand_detection_yolo_2024,
  author = {EtanHey},
  title = {Hand Detection YOLOv8 Model},
  year = {2024},
  publisher = {HuggingFace},
  url = {https://huggingface.co/EtanHey/hand-detection-3class}
}