3v324v23 commited on
Commit
7d6b04d
Β·
0 Parent(s):
Files changed (3) hide show
  1. README.md +45 -0
  2. app.py +337 -0
  3. requirements.txt +8 -0
README.md ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: OpenPose Preprocessor
3
+ emoji: 🦴
4
+ colorFrom: purple
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: "4.44.0"
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ # 🦴 OpenPose Preprocessor for ControlNet
14
+
15
+ A powerful pose detection preprocessor supporting multiple models with high customization options.
16
+
17
+ ## Features
18
+
19
+ - **Multiple Models**: OpenPose (various modes) + DWPose
20
+ - **High Customization**: Toggle hand/face detection, adjust resolution
21
+ - **Multiple Outputs**: Visual skeleton, JSON keypoints, or both
22
+ - **GPU Acceleration**: Auto-detects GPU, falls back to CPU
23
+
24
+ ## Supported Models
25
+
26
+ | Model | Description |
27
+ |-------|-------------|
28
+ | OpenPose | Basic body keypoints |
29
+ | OpenPose (Face) | Body + facial landmarks |
30
+ | OpenPose (Hand) | Body + hand keypoints |
31
+ | OpenPose (Full) | Body + face + hands |
32
+ | OpenPose (Face Only) | Facial landmarks only |
33
+ | DWPose | More accurate pose detection |
34
+
35
+ ## Usage
36
+
37
+ 1. Upload an image
38
+ 2. Select your preferred model
39
+ 3. Adjust detection options (hands, face, resolution)
40
+ 4. Click "Detect Pose" to process
41
+ 5. Download the result or copy JSON keypoints
42
+
43
+ ## For ControlNet Users
44
+
45
+ The output is directly compatible with ControlNet OpenPose models. Simply download the pose image and use it as your ControlNet input.
app.py ADDED
@@ -0,0 +1,337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ OpenPose Preprocessor for ControlNet
3
+ A Gradio application for pose detection with multiple models and customization options.
4
+ """
5
+
6
+ import gradio as gr
7
+ import numpy as np
8
+ from PIL import Image
9
+ import torch
10
+ import json
11
+ from typing import Tuple, Optional, Dict, Any
12
+
13
+ # Global device detection
14
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
15
+ print(f"Using device: {DEVICE}")
16
+
17
+ # Model cache to avoid reloading
18
+ _model_cache: Dict[str, Any] = {}
19
+
20
+
21
+ def get_openpose_detector():
22
+ """Get or create OpenPose detector."""
23
+ if "openpose" not in _model_cache:
24
+ from controlnet_aux import OpenposeDetector
25
+ _model_cache["openpose"] = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
26
+ return _model_cache["openpose"]
27
+
28
+
29
+ def get_dwpose_detector():
30
+ """Get or create DWPose detector."""
31
+ if "dwpose" not in _model_cache:
32
+ from easy_dwpose import DWposeDetector
33
+ _model_cache["dwpose"] = DWposeDetector(device=DEVICE)
34
+ return _model_cache["dwpose"]
35
+
36
+
37
+ def process_with_openpose(
38
+ image: Image.Image,
39
+ mode: str,
40
+ detect_hand: bool,
41
+ detect_face: bool,
42
+ detect_resolution: int,
43
+ ) -> Tuple[Image.Image, Optional[dict]]:
44
+ """Process image using OpenPose detector."""
45
+ detector = get_openpose_detector()
46
+
47
+ # Determine hand_and_face parameter based on mode and toggles
48
+ if mode == "OpenPose (Full)":
49
+ hand_and_face = True
50
+ elif mode == "OpenPose (Hand)":
51
+ hand_and_face = detect_hand
52
+ elif mode == "OpenPose (Face)":
53
+ hand_and_face = detect_face
54
+ elif mode == "OpenPose (Face Only)":
55
+ # Face only mode
56
+ result = detector(
57
+ image,
58
+ detect_resolution=detect_resolution,
59
+ include_body=False,
60
+ include_hand=False,
61
+ include_face=True,
62
+ output_type="pil"
63
+ )
64
+ return result, None
65
+ else:
66
+ # Basic OpenPose
67
+ hand_and_face = detect_hand and detect_face
68
+
69
+ result = detector(
70
+ image,
71
+ detect_resolution=detect_resolution,
72
+ hand_and_face=hand_and_face,
73
+ output_type="pil"
74
+ )
75
+
76
+ return result, None
77
+
78
+
79
+ def process_with_dwpose(
80
+ image: Image.Image,
81
+ detect_hand: bool,
82
+ detect_face: bool,
83
+ detect_resolution: int,
84
+ ) -> Tuple[Image.Image, Optional[dict]]:
85
+ """Process image using DWPose detector."""
86
+ detector = get_dwpose_detector()
87
+
88
+ # Resize image to detect_resolution while maintaining aspect ratio
89
+ orig_w, orig_h = image.size
90
+ scale = detect_resolution / max(orig_w, orig_h)
91
+ new_w, new_h = int(orig_w * scale), int(orig_h * scale)
92
+ resized_image = image.resize((new_w, new_h), Image.Resampling.LANCZOS)
93
+
94
+ result = detector(
95
+ resized_image,
96
+ output_type="pil",
97
+ include_hands=detect_hand,
98
+ include_face=detect_face
99
+ )
100
+
101
+ # Resize back to original size
102
+ result = result.resize((orig_w, orig_h), Image.Resampling.LANCZOS)
103
+
104
+ return result, None
105
+
106
+
107
+ def detect_pose(
108
+ image: Image.Image,
109
+ model_type: str,
110
+ detect_hand: bool,
111
+ detect_face: bool,
112
+ detect_resolution: int,
113
+ output_resolution: int,
114
+ output_format: str,
115
+ ) -> Tuple[Optional[Image.Image], str]:
116
+ """
117
+ Main pose detection function.
118
+
119
+ Args:
120
+ image: Input PIL Image
121
+ model_type: Selected model type
122
+ detect_hand: Whether to detect hands
123
+ detect_face: Whether to detect face
124
+ detect_resolution: Resolution for detection
125
+ output_resolution: Resolution for output image
126
+ output_format: "Image", "JSON", or "Both"
127
+
128
+ Returns:
129
+ Tuple of (output_image, json_string)
130
+ """
131
+ if image is None:
132
+ return None, "Please upload an image first."
133
+
134
+ try:
135
+ # Convert to RGB if necessary
136
+ if image.mode != "RGB":
137
+ image = image.convert("RGB")
138
+
139
+ # Process based on model type
140
+ if model_type == "DWPose":
141
+ result_image, keypoints = process_with_dwpose(
142
+ image, detect_hand, detect_face, detect_resolution
143
+ )
144
+ else:
145
+ result_image, keypoints = process_with_openpose(
146
+ image, model_type, detect_hand, detect_face, detect_resolution
147
+ )
148
+
149
+ # Resize output if needed
150
+ if output_resolution > 0:
151
+ orig_w, orig_h = result_image.size
152
+ scale = output_resolution / max(orig_w, orig_h)
153
+ new_w, new_h = int(orig_w * scale), int(orig_h * scale)
154
+ result_image = result_image.resize((new_w, new_h), Image.Resampling.LANCZOS)
155
+
156
+ # Prepare outputs based on format
157
+ json_output = ""
158
+ if output_format == "JSON" or output_format == "Both":
159
+ json_output = json.dumps({
160
+ "model": model_type,
161
+ "detect_hand": detect_hand,
162
+ "detect_face": detect_face,
163
+ "detect_resolution": detect_resolution,
164
+ "output_resolution": output_resolution,
165
+ "device": DEVICE,
166
+ "status": "success",
167
+ "note": "Keypoint extraction requires additional processing. Use the output image for ControlNet."
168
+ }, indent=2, ensure_ascii=False)
169
+
170
+ if output_format == "JSON":
171
+ return None, json_output
172
+ elif output_format == "Image":
173
+ return result_image, "Processing complete. Image ready for ControlNet."
174
+ else: # Both
175
+ return result_image, json_output
176
+
177
+ except Exception as e:
178
+ error_msg = f"Error during processing: {str(e)}"
179
+ return None, error_msg
180
+
181
+
182
+ def create_ui() -> gr.Blocks:
183
+ """Create the Gradio UI."""
184
+
185
+ css = """
186
+ .main-title {
187
+ text-align: center;
188
+ margin-bottom: 1rem;
189
+ }
190
+ .settings-panel {
191
+ background: var(--background-fill-secondary);
192
+ padding: 1rem;
193
+ border-radius: 8px;
194
+ }
195
+ """
196
+
197
+ with gr.Blocks(
198
+ title="🦴 OpenPose Preprocessor",
199
+ css=css,
200
+ theme=gr.themes.Soft()
201
+ ) as demo:
202
+
203
+ # Header
204
+ gr.Markdown(
205
+ """
206
+ # 🦴 OpenPose Preprocessor for ControlNet
207
+
208
+ High-quality pose detection with multiple models and customization options.
209
+ Upload an image and get pose skeleton for ControlNet.
210
+ """
211
+ )
212
+
213
+ # Device info
214
+ gr.Markdown(f"**Device**: `{DEVICE}` {'πŸš€' if DEVICE == 'cuda' else '🐒'}")
215
+
216
+ with gr.Row():
217
+ # Left column - Input
218
+ with gr.Column(scale=1):
219
+ input_image = gr.Image(
220
+ label="πŸ“· Input Image",
221
+ type="pil",
222
+ height=400
223
+ )
224
+
225
+ # Settings
226
+ with gr.Accordion("βš™οΈ Settings", open=True):
227
+ model_type = gr.Dropdown(
228
+ label="πŸ€– Model",
229
+ choices=[
230
+ "DWPose",
231
+ "OpenPose",
232
+ "OpenPose (Face)",
233
+ "OpenPose (Hand)",
234
+ "OpenPose (Full)",
235
+ "OpenPose (Face Only)"
236
+ ],
237
+ value="DWPose",
238
+ info="DWPose is recommended for better accuracy"
239
+ )
240
+
241
+ with gr.Row():
242
+ detect_hand = gr.Checkbox(
243
+ label="πŸ‘† Detect Hands",
244
+ value=True
245
+ )
246
+ detect_face = gr.Checkbox(
247
+ label="😊 Detect Face",
248
+ value=True
249
+ )
250
+
251
+ detect_resolution = gr.Slider(
252
+ label="πŸ“ Detection Resolution",
253
+ minimum=256,
254
+ maximum=2048,
255
+ value=512,
256
+ step=64,
257
+ info="Higher = more accurate but slower"
258
+ )
259
+
260
+ output_resolution = gr.Slider(
261
+ label="πŸ–ΌοΈ Output Resolution",
262
+ minimum=256,
263
+ maximum=2048,
264
+ value=512,
265
+ step=64,
266
+ info="Final output image resolution"
267
+ )
268
+
269
+ output_format = gr.Radio(
270
+ label="πŸ“Š Output Format",
271
+ choices=["Image", "JSON", "Both"],
272
+ value="Both"
273
+ )
274
+
275
+ # Process button
276
+ process_btn = gr.Button(
277
+ "πŸš€ Detect Pose",
278
+ variant="primary",
279
+ size="lg"
280
+ )
281
+
282
+ # Right column - Output
283
+ with gr.Column(scale=1):
284
+ output_image = gr.Image(
285
+ label="🎨 Output Pose",
286
+ type="pil",
287
+ height=400
288
+ )
289
+
290
+ output_json = gr.Textbox(
291
+ label="πŸ“‹ Output Info",
292
+ lines=8,
293
+ max_lines=15
294
+ )
295
+
296
+ # Examples
297
+ gr.Markdown("### πŸ“Œ Tips")
298
+ gr.Markdown(
299
+ """
300
+ - **DWPose** is recommended for best accuracy, especially for hands
301
+ - **OpenPose (Full)** detects body, face, and hands together
302
+ - Higher **Detection Resolution** improves accuracy but increases processing time
303
+ - The output image can be directly used with ControlNet OpenPose models
304
+ """
305
+ )
306
+
307
+ # Connect events
308
+ process_btn.click(
309
+ fn=detect_pose,
310
+ inputs=[
311
+ input_image,
312
+ model_type,
313
+ detect_hand,
314
+ detect_face,
315
+ detect_resolution,
316
+ output_resolution,
317
+ output_format,
318
+ ],
319
+ outputs=[output_image, output_json]
320
+ )
321
+
322
+ # Also process on image upload for convenience
323
+ input_image.change(
324
+ fn=lambda: ("", ""),
325
+ outputs=[output_image, output_json]
326
+ )
327
+
328
+ return demo
329
+
330
+
331
+ if __name__ == "__main__":
332
+ demo = create_ui()
333
+ demo.launch(
334
+ server_name="0.0.0.0",
335
+ server_port=7860,
336
+ share=False
337
+ )
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ controlnet-aux>=0.0.9
3
+ easy-dwpose
4
+ torch
5
+ torchvision
6
+ Pillow
7
+ numpy
8
+ opencv-python-headless