EtanHey commited on
Commit
777c63c
·
1 Parent(s): 10b39cb

Deploy hand detection API with Gradio interface and FastAPI endpoints

Browse files
Files changed (2) hide show
  1. app.py +291 -0
  2. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ HuggingFace Spaces App for Hand/Arm Detection
3
+ Provides both Gradio UI and API endpoints
4
+ Model: https://huggingface.co/EtanHey/hand-sign-detection
5
+ """
6
+
7
+ import gradio as gr
8
+ from ultralytics import YOLO
9
+ import numpy as np
10
+ from PIL import Image
11
+ import json
12
+ import base64
13
+ from io import BytesIO
14
+ from typing import Dict, Tuple, Any
15
+ import logging
16
+ from fastapi import FastAPI, File, UploadFile, HTTPException
17
+ from fastapi.responses import JSONResponse
18
+ import uvicorn
19
+ from threading import Thread
20
+
21
+ logging.basicConfig(level=logging.INFO)
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # Initialize FastAPI app for API endpoints
25
+ app = FastAPI(title="Hand Detection API")
26
+
27
+ # Load the model
28
+ MODEL_PATH = "https://huggingface.co/EtanHey/hand-sign-detection/resolve/main/model.pt"
29
+ model = None
30
+
31
+ def load_model():
32
+ """Load YOLO model from HuggingFace"""
33
+ global model
34
+ try:
35
+ logger.info(f"Loading model from {MODEL_PATH}")
36
+ model = YOLO(MODEL_PATH)
37
+ logger.info("✅ Model loaded successfully!")
38
+ return True
39
+ except Exception as e:
40
+ logger.error(f"❌ Failed to load model: {e}")
41
+ return False
42
+
43
+ # Load model on startup
44
+ load_model()
45
+
46
+ # Class names (alphabetical order as YOLO expects)
47
+ CLASS_NAMES = ['arm', 'hand', 'not_hand']
48
+ CLASS_LABELS = {
49
+ 'arm': '💪 Arm',
50
+ 'hand': '✋ Hand',
51
+ 'not_hand': '❌ Not Hand/Arm'
52
+ }
53
+
54
+ def process_image(image: Image.Image) -> Dict[str, Any]:
55
+ """Process image and return detection results"""
56
+ if model is None:
57
+ return {
58
+ "error": "Model not loaded",
59
+ "class": "unknown",
60
+ "confidence": 0.0,
61
+ "probabilities": {"hand": 0, "arm": 0, "not_hand": 0}
62
+ }
63
+
64
+ try:
65
+ # Convert PIL image to RGB if needed
66
+ if image.mode != 'RGB':
67
+ image = image.convert('RGB')
68
+
69
+ # Run inference
70
+ results = model.predict(image, verbose=False)
71
+
72
+ if not results or not results[0].probs:
73
+ return {
74
+ "class": "not_hand",
75
+ "confidence": 0.0,
76
+ "probabilities": {"hand": 0, "arm": 0, "not_hand": 1.0}
77
+ }
78
+
79
+ # Extract probabilities
80
+ probs = results[0].probs
81
+ top_class_idx = probs.top1
82
+ top_confidence = float(probs.top1conf)
83
+
84
+ # Build probability dictionary
85
+ probabilities = {
86
+ "hand": float(probs.data[1]), # Index 1
87
+ "arm": float(probs.data[0]), # Index 0
88
+ "not_hand": float(probs.data[2]) # Index 2
89
+ }
90
+
91
+ return {
92
+ "class": CLASS_NAMES[top_class_idx],
93
+ "confidence": top_confidence,
94
+ "probabilities": probabilities,
95
+ "label": CLASS_LABELS[CLASS_NAMES[top_class_idx]]
96
+ }
97
+
98
+ except Exception as e:
99
+ logger.error(f"Error processing image: {e}")
100
+ return {
101
+ "error": str(e),
102
+ "class": "error",
103
+ "confidence": 0.0,
104
+ "probabilities": {"hand": 0, "arm": 0, "not_hand": 0}
105
+ }
106
+
107
+ def gradio_predict(image: Image.Image) -> Tuple[str, Dict, str]:
108
+ """Gradio interface prediction function"""
109
+ if image is None:
110
+ return "Please upload an image", {}, ""
111
+
112
+ # Process the image
113
+ result = process_image(image)
114
+
115
+ # Format output
116
+ if "error" in result:
117
+ return f"Error: {result['error']}", {}, ""
118
+
119
+ # Create confidence bars
120
+ confidence_scores = {
121
+ "✋ Hand": result["probabilities"]["hand"],
122
+ "💪 Arm": result["probabilities"]["arm"],
123
+ "❌ Neither": result["probabilities"]["not_hand"]
124
+ }
125
+
126
+ # Create detailed output
127
+ main_label = result["label"]
128
+ confidence = result["confidence"]
129
+
130
+ output_text = f"""
131
+ ## Detection Result
132
+
133
+ **Detected:** {main_label}
134
+ **Confidence:** {confidence:.1%}
135
+
136
+ ### Detailed Probabilities:
137
+ - Hand: {result['probabilities']['hand']:.1%}
138
+ - Arm: {result['probabilities']['arm']:.1%}
139
+ - Not Hand/Arm: {result['probabilities']['not_hand']:.1%}
140
+
141
+ ### Understanding the Classes:
142
+ - **Hand**: Close-up view with fingers visible
143
+ - **Arm**: Forearm or elbow area without fingers
144
+ - **Not Hand/Arm**: Neither hand nor arm detected
145
+ """
146
+
147
+ # Create JSON output for developers
148
+ json_output = json.dumps(result, indent=2)
149
+
150
+ return output_text, confidence_scores, json_output
151
+
152
+ # FastAPI endpoints for API access
153
+ @app.get("/")
154
+ async def root():
155
+ """Health check endpoint"""
156
+ return {
157
+ "status": "online",
158
+ "model": "hand-sign-detection",
159
+ "classes": CLASS_NAMES,
160
+ "api_endpoints": {
161
+ "health": "/",
162
+ "predict": "/api/predict",
163
+ "predict_base64": "/api/predict/base64"
164
+ }
165
+ }
166
+
167
+ @app.post("/api/predict")
168
+ async def predict_api(file: UploadFile = File(...)):
169
+ """API endpoint for file upload prediction"""
170
+ try:
171
+ # Read image
172
+ contents = await file.read()
173
+ image = Image.open(BytesIO(contents))
174
+
175
+ # Process
176
+ result = process_image(image)
177
+
178
+ return JSONResponse(content=result)
179
+
180
+ except Exception as e:
181
+ raise HTTPException(status_code=400, detail=str(e))
182
+
183
+ @app.post("/api/predict/base64")
184
+ async def predict_base64_api(data: Dict[str, str]):
185
+ """API endpoint for base64 image prediction"""
186
+ try:
187
+ # Decode base64 image
188
+ image_data = base64.b64decode(data["image"])
189
+ image = Image.open(BytesIO(image_data))
190
+
191
+ # Process
192
+ result = process_image(image)
193
+
194
+ return JSONResponse(content=result)
195
+
196
+ except Exception as e:
197
+ raise HTTPException(status_code=400, detail=str(e))
198
+
199
+ # Gradio Interface
200
+ def create_gradio_interface():
201
+ """Create the Gradio interface"""
202
+
203
+ # Custom CSS for better styling
204
+ custom_css = """
205
+ .gradio-container {
206
+ font-family: 'Inter', sans-serif;
207
+ }
208
+ .output-class {
209
+ font-size: 24px;
210
+ font-weight: bold;
211
+ }
212
+ """
213
+
214
+ # Example images
215
+ examples = [
216
+ ["examples/hand_example.jpg"],
217
+ ["examples/arm_example.jpg"],
218
+ ["examples/face_example.jpg"]
219
+ ]
220
+
221
+ # Create interface
222
+ interface = gr.Interface(
223
+ fn=gradio_predict,
224
+ inputs=[
225
+ gr.Image(
226
+ type="pil",
227
+ label="Upload Image",
228
+ sources=["upload", "webcam", "clipboard"]
229
+ )
230
+ ],
231
+ outputs=[
232
+ gr.Markdown(label="Detection Result"),
233
+ gr.Label(label="Confidence Scores", num_top_classes=3),
234
+ gr.JSON(label="API Response (for developers)")
235
+ ],
236
+ title="🤚 Hand/Arm Detection AI",
237
+ description="""
238
+ Upload an image or use your webcam to detect hands and arms.
239
+
240
+ **Model:** YOLOv8 trained on 1,740 images | **Accuracy:** 96.3%
241
+
242
+ **API Access:** Use the `/api/predict` endpoint for programmatic access.
243
+ """,
244
+ article="""
245
+ ### About
246
+ This model distinguishes between:
247
+ - **Hands**: Close-up views with visible fingers
248
+ - **Arms**: Forearm/elbow areas without fingers
249
+ - **Neither**: Images without hands or arms
250
+
251
+ ### API Usage
252
+ ```python
253
+ import requests
254
+
255
+ # Upload file
256
+ response = requests.post(
257
+ "https://huggingface.co/spaces/EtanHey/hand-detection/api/predict",
258
+ files={"file": open("image.jpg", "rb")}
259
+ )
260
+ print(response.json())
261
+ ```
262
+
263
+ ### Model Card
264
+ View the full model details at [HuggingFace Model Hub](https://huggingface.co/EtanHey/hand-sign-detection)
265
+ """,
266
+ examples=examples if examples else None,
267
+ cache_examples=True,
268
+ css=custom_css,
269
+ theme=gr.themes.Soft()
270
+ )
271
+
272
+ return interface
273
+
274
+ # Run FastAPI in background thread
275
+ def run_api():
276
+ """Run FastAPI server in background"""
277
+ uvicorn.run(app, host="0.0.0.0", port=7860)
278
+
279
+ # Start API server in background
280
+ api_thread = Thread(target=run_api, daemon=True)
281
+ api_thread.start()
282
+
283
+ # Create and launch Gradio interface
284
+ if __name__ == "__main__":
285
+ interface = create_gradio_interface()
286
+ interface.launch(
287
+ server_name="0.0.0.0",
288
+ server_port=7861, # Different port for Gradio
289
+ share=False,
290
+ debug=True
291
+ )
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio>=4.44.0
2
+ ultralytics>=8.3.0
3
+ Pillow>=10.0.0
4
+ numpy>=1.24.0
5
+ torch>=2.0.0
6
+ fastapi>=0.104.0
7
+ uvicorn>=0.24.0
8
+ python-multipart>=0.0.6
9
+ opencv-python-headless>=4.8.0