aiqtech commited on
Commit
4fea6f9
·
verified ·
1 Parent(s): 63ddbf6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +336 -436
app.py CHANGED
@@ -1,15 +1,12 @@
1
  import gradio as gr
2
  import numpy as np
3
- import cv2
4
- from fastapi import FastAPI, Request, Response
5
- from src.body import Body
6
- import json as js
7
  import requests
8
  import os
9
  from typing import Dict, List, Tuple
10
-
11
- # Initialize body estimation model
12
- body_estimation = Body('model/body_pose_model.pth')
13
 
14
  # Fireworks AI configuration
15
  FIREWORKS_API_KEY = os.getenv("FIREWORKS_API_KEY", "YOUR_API_KEY_HERE")
@@ -23,69 +20,100 @@ BODY_PARTS = {
23
  "LEye": 15, "REar": 16, "LEar": 17
24
  }
25
 
26
- # Pose templates for common positions
 
 
 
 
 
 
 
 
 
 
 
27
  POSE_TEMPLATES = {
28
- "standing": {
29
- "keypoints": {
30
- "Neck": [256, 120],
31
- "RShoulder": [220, 140], "RElbow": [200, 200], "RWrist": [190, 260],
32
- "LShoulder": [292, 140], "LElbow": [312, 200], "LWrist": [322, 260],
33
- "RHip": [230, 280], "RKnee": [225, 380], "RAnkle": [220, 480],
34
- "LHip": [282, 280], "LKnee": [287, 380], "LAnkle": [292, 480]
35
- }
 
 
 
 
 
 
 
36
  },
37
- "sitting": {
38
- "keypoints": {
39
- "Neck": [256, 180],
40
- "RShoulder": [220, 200], "RElbow": [200, 260], "RWrist": [190, 320],
41
- "LShoulder": [292, 200], "LElbow": [312, 260], "LWrist": [322, 320],
42
- "RHip": [230, 340], "RKnee": [225, 400], "RAnkle": [280, 420],
43
- "LHip": [282, 340], "LKnee": [287, 400], "LAnkle": [232, 420]
44
- }
45
  },
46
- "running": {
47
- "keypoints": {
48
- "Neck": [256, 120],
49
- "RShoulder": [220, 140], "RElbow": [180, 180], "RWrist": [150, 220],
50
- "LShoulder": [292, 140], "LElbow": [332, 180], "LWrist": [362, 140],
51
- "RHip": [230, 280], "RKnee": [260, 380], "RAnkle": [290, 470],
52
- "LHip": [282, 280], "LKnee": [252, 360], "LAnkle": [222, 440]
53
- }
54
  },
55
- "yoga": {
56
- "keypoints": {
57
- "Neck": [256, 140],
58
- "RShoulder": [200, 160], "RElbow": [150, 120], "RWrist": [100, 100],
59
- "LShoulder": [312, 160], "LElbow": [362, 120], "LWrist": [412, 100],
60
- "RHip": [240, 300], "RKnee": [220, 400], "RAnkle": [200, 480],
61
- "LHip": [272, 300], "LKnee": [292, 400], "LAnkle": [312, 480]
62
- }
63
  }
64
  }
65
 
66
- def pil2cv(image):
67
- '''PIL型 -> OpenCV��'''
68
- new_image = np.array(image, dtype=np.uint8)
69
- if new_image.ndim == 2: # モノクロ
70
- pass
71
- elif new_image.shape[2] == 3: # カラー
72
- new_image = cv2.cvtColor(new_image, cv2.COLOR_RGB2BGR)
73
- elif new_image.shape[2] == 4: # 透過
74
- new_image = cv2.cvtColor(new_image, cv2.COLOR_RGBA2BGRA)
75
- return new_image
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- def generate_pose_from_llm_sync(prompt: str) -> Dict:
78
  """
79
- LLM을 사용하여 텍스트 프롬프트로부터 포즈 데이터를 생성
80
  """
81
- system_prompt = """You are an expert in human pose generation. Generate precise OpenPose keypoint coordinates.
 
 
 
 
 
82
 
83
- Canvas: 512x512 pixels
84
- Return 18 keypoints with format: [[x, y, confidence], ...]
85
- Keypoints: 0:Nose, 1:Neck, 2:RShoulder, 3:RElbow, 4:RWrist, 5:LShoulder, 6:LElbow, 7:LWrist,
86
- 8:RHip, 9:RKnee, 10:RAnkle, 11:LHip, 12:LKnee, 13:LAnkle, 14:REye, 15:LEye, 16:REar, 17:LEar
87
 
88
- Return JSON: {"candidate": [[x,y,1.0],...], "subset": [[connected_indices, score, count]]}"""
89
 
90
  headers = {
91
  "Accept": "application/json",
@@ -95,11 +123,11 @@ def generate_pose_from_llm_sync(prompt: str) -> Dict:
95
 
96
  payload = {
97
  "model": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
98
- "max_tokens": 2048,
99
  "temperature": 0.3,
100
  "messages": [
101
  {"role": "system", "content": system_prompt},
102
- {"role": "user", "content": f"Generate OpenPose keypoints for: {prompt}"}
103
  ]
104
  }
105
 
@@ -109,70 +137,63 @@ def generate_pose_from_llm_sync(prompt: str) -> Dict:
109
  data = response.json()
110
  content = data['choices'][0]['message']['content']
111
 
 
112
  import re
113
  json_match = re.search(r'\{.*\}', content, re.DOTALL)
114
  if json_match:
115
- pose_data = js.loads(json_match.group())
116
- return pose_data
117
  except Exception as e:
118
  print(f"LLM Error: {e}")
119
 
120
- return generate_template_pose(prompt)
 
121
 
122
- def generate_template_pose(prompt: str) -> Dict:
123
  """
124
- 템플릿 기반 포즈 생성
125
  """
126
  prompt_lower = prompt.lower()
127
 
128
- # Select template based on keywords
129
- if any(word in prompt_lower for word in ["sit", "sitting", "seated", "chair"]):
130
- template = POSE_TEMPLATES["sitting"]
131
- elif any(word in prompt_lower for word in ["run", "running", "jog", "sprint"]):
132
- template = POSE_TEMPLATES["running"]
133
- elif any(word in prompt_lower for word in ["yoga", "warrior", "meditation", "stretch"]):
134
- template = POSE_TEMPLATES["yoga"]
 
135
  else:
136
- template = POSE_TEMPLATES["standing"]
137
-
138
- # Build candidate array
139
- candidate = []
140
- for i in range(18):
141
- if i == 0: # Nose
142
- candidate.append([256, 100, 1.0])
143
- elif i == 14: # REye
144
- candidate.append([246, 90, 1.0])
145
- elif i == 15: # LEye
146
- candidate.append([266, 90, 1.0])
147
- elif i == 16: # REar
148
- candidate.append([236, 95, 1.0])
149
- elif i == 17: # LEar
150
- candidate.append([276, 95, 1.0])
151
- else:
152
- part_name = None
153
- for name, idx in BODY_PARTS.items():
154
- if idx == i:
155
- part_name = name
156
- break
157
-
158
- if part_name and part_name in template["keypoints"]:
159
- x, y = template["keypoints"][part_name]
160
- candidate.append([x, y, 1.0])
161
- else:
162
- candidate.append([256, 256, 0.0])
163
-
164
- # Create subset
165
- valid_indices = [i for i in range(18) if candidate[i][2] > 0]
166
- subset = [valid_indices + [float(len(valid_indices)), len(valid_indices)]]
167
-
168
- return {"candidate": candidate, "subset": subset}
169
 
170
- def refine_pose_with_llm(current_pose: Dict, refinement_prompt: str) -> Dict:
171
  """
172
- 기존 포즈를 LLM으로 세밀하게 조정
173
  """
174
- system_prompt = """Modify the given pose keypoints based on instructions.
175
- Maintain anatomical correctness. Return same JSON format."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
  headers = {
178
  "Accept": "application/json",
@@ -182,11 +203,11 @@ def refine_pose_with_llm(current_pose: Dict, refinement_prompt: str) -> Dict:
182
 
183
  payload = {
184
  "model": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
185
- "max_tokens": 2048,
186
  "temperature": 0.2,
187
  "messages": [
188
  {"role": "system", "content": system_prompt},
189
- {"role": "user", "content": f"Current: {js.dumps(current_pose)}\nAdjust: {refinement_prompt}"}
190
  ]
191
  }
192
 
@@ -199,363 +220,242 @@ def refine_pose_with_llm(current_pose: Dict, refinement_prompt: str) -> Dict:
199
  import re
200
  json_match = re.search(r'\{.*\}', content, re.DOTALL)
201
  if json_match:
202
- return js.loads(json_match.group())
203
  except Exception as e:
204
  print(f"Refinement error: {e}")
205
 
206
- return current_pose
207
-
208
- # Load JavaScript file
209
- try:
210
- with open("static/poseEditor.js", "r") as f:
211
- file_contents = f.read()
212
- except:
213
- file_contents = "console.log('PoseEditor.js not found');"
214
-
215
- app = FastAPI()
216
-
217
- @app.middleware("http")
218
- async def some_fastapi_middleware(request: Request, call_next):
219
- path = request.scope['path']
220
- response = await call_next(request)
221
-
222
- if path == "/":
223
- response_body = ""
224
- async for chunk in response.body_iterator:
225
- response_body += chunk.decode()
226
-
227
- some_javascript = f"""
228
- <script type="text/javascript" defer>
229
- {file_contents}
230
- </script>
231
- """
232
 
233
- response_body = response_body.replace("</body>", some_javascript + "</body>")
234
- del response.headers["content-length"]
235
-
236
- return Response(
237
- content=response_body,
238
- status_code=response.status_code,
239
- headers=dict(response.headers),
240
- media_type=response.media_type
241
- )
242
-
243
- return response
244
-
245
- def candidate_to_json_string(arr):
246
- if isinstance(arr, list):
247
- a = []
248
- for item in arr:
249
- if len(item) >= 2:
250
- x, y = item[0], item[1]
251
- a.append(f'[{float(x):.2f}, {float(y):.2f}]')
252
- return '[' + ', '.join(a) + ']'
253
- return '[]'
254
-
255
- def subset_to_json_string(arr):
256
- if isinstance(arr, np.ndarray):
257
- arr_str = ','.join(['[' + ','.join([f'{num:.2f}' for num in row]) + ']' for row in arr])
258
- return '[' + arr_str + ']'
259
- elif isinstance(arr, list):
260
- arr_str = ','.join(['[' + ','.join([f'{float(num):.2f}' for num in row]) + ']' for row in arr])
261
- return '[' + arr_str + ']'
262
- return '[]'
263
-
264
- def estimate_body(source):
265
- if source == None:
266
- return None
267
-
268
- candidate, subset = body_estimation(pil2cv(source))
269
- return "{ \"candidate\": " + candidate_to_json_string(candidate) + ", \"subset\": " + subset_to_json_string(subset) + " }"
270
-
271
- def image_changed(image):
272
- if image == None:
273
- return "estimation", {}
274
-
275
- if 'openpose' in image.info:
276
- print("pose found")
277
- jsonText = image.info['openpose']
278
- jsonObj = js.loads(jsonText)
279
- subset = jsonObj['subset']
280
- return f"""{image.width}px x {image.height}px, {len(subset)} individual(s)""", jsonText
281
- else:
282
- print("pose not found")
283
- candidate, subset = body_estimation(pil2cv(image))
284
- jsonText = "{ \"candidate\": " + candidate_to_json_string(candidate) + ", \"subset\": " + subset_to_json_string(subset) + " }"
285
- return f"""{image.width}px x {image.height}px, {subset.shape[0]} individual(s)""", jsonText
286
-
287
- def generate_pose_from_text(prompt: str, use_llm: bool = True):
288
  """
289
- 텍스트 프롬프트로부터 포즈 생성
290
  """
291
- if use_llm and FIREWORKS_API_KEY != "YOUR_API_KEY_HERE":
292
- pose_data = generate_pose_from_llm_sync(prompt)
293
- else:
294
- pose_data = generate_template_pose(prompt)
295
-
296
- candidate_str = candidate_to_json_string(pose_data['candidate'])
297
- subset_str = subset_to_json_string(pose_data['subset'])
298
-
299
- return "{ \"candidate\": " + candidate_str + ", \"subset\": " + subset_str + " }"
300
-
301
- # HTML with embedded JavaScript for canvas
302
- html_text = f"""
303
- <div style="position: relative; width: 100%; height: 512px;">
304
- <canvas id="canvas" width="512" height="512" style="border: 1px solid #ccc;"></canvas>
305
- </div>
306
- <script type="text/javascript">
307
- // Canvas initialization
308
- {file_contents}
309
-
310
- // Helper functions for Gradio integration
311
- window.updateCanvasSize = function(width, height) {{
312
- const canvas = document.getElementById('canvas');
313
- if (canvas) {{
314
- if (width) canvas.width = width;
315
- if (height) canvas.height = height;
316
- if (typeof resizeCanvas !== 'undefined') {{
317
- resizeCanvas(width, height);
318
- }}
319
- }}
320
- }};
321
 
322
- window.loadPoseData = function(jsonStr) {{
323
- if (typeof importPose !== 'undefined' && jsonStr) {{
324
- try {{
325
- const poseData = JSON.parse(jsonStr);
326
- importPose(poseData);
327
- }} catch(e) {{
328
- console.error('Error loading pose:', e);
329
- }}
330
- }}
331
- }};
332
 
333
- // Initialize on load
334
- document.addEventListener('DOMContentLoaded', function() {{
335
- if (typeof initializeEditor !== 'undefined') {{
336
- initializeEditor();
337
- }}
338
- }});
339
- </script>
340
- """
341
 
342
- # Gradio interface
343
- with gr.Blocks(css="""
344
- button { min-width: 80px; }
345
- .prompt-box { border: 2px solid #667eea; border-radius: 8px; padding: 10px; }
346
- .llm-status { color: #667eea; font-weight: bold; }
347
- #canvas { border: 1px solid #ddd; }
348
- """) as demo:
349
 
350
  gr.Markdown("""
351
- # 🎨 AI-Powered Pose Generator with LLM
352
- ### Generate precise line art poses from text descriptions
353
  """)
354
 
355
- with gr.Row():
356
- with gr.Column(scale=1):
357
- width = gr.Slider(label="Width", minimum=512, maximum=1024, step=64, value=512, interactive=True)
358
- height = gr.Slider(label="Height", minimum=512, maximum=1024, step=64, value=512, interactive=True)
359
-
360
- # LLM Pose Generation
361
- with gr.Accordion(label="🤖 AI Pose Generation", open=True):
362
- prompt_input = gr.Textbox(
363
- label="Describe the pose",
364
- placeholder="e.g., 'A person sitting cross-legged' or 'Someone running'",
365
- lines=3
366
- )
367
-
368
- with gr.Row():
369
- use_llm_checkbox = gr.Checkbox(label="Use Advanced LLM", value=True)
370
- llm_status = gr.Markdown("")
371
-
372
- with gr.Row():
373
- generate_btn = gr.Button("🎯 Generate Pose", variant="primary")
374
- refine_btn = gr.Button("✨ Refine Current", variant="secondary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375
 
376
- refinement_prompt = gr.Textbox(
377
- label="Refinement instructions",
378
- placeholder="e.g., 'Raise the left arm higher'",
379
- lines=2,
380
- visible=False
381
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
 
383
- gr.Examples(
384
- examples=[
385
- "A person standing with arms raised in victory",
386
- "Someone sitting at a desk typing",
387
- "A dancer in arabesque position",
388
- "A person doing yoga warrior pose",
389
- "Someone crouching in ready position",
390
- "A person walking casually"
391
- ],
392
- inputs=prompt_input
393
- )
394
 
395
- # Image Estimation
396
- with gr.Accordion(label="📸 Pose from Image", open=False):
397
- source = gr.Image(type="pil")
398
- estimationResult = gr.Markdown("estimation")
399
- with gr.Row():
400
- applySizeBtn = gr.Button("Apply size")
401
- replaceBtn = gr.Button("Replace")
402
- importBtn = gr.Button("Import")
403
 
404
- # JSON Data
405
- with gr.Accordion(label="📋 Json Data", open=False):
406
- with gr.Row():
407
- replaceWithJsonBtn = gr.Button("Replace")
408
- importJsonBtn = gr.Button("Import")
409
- json = gr.JSON(label="Json")
410
- jsonSource = gr.Textbox(label="Json source", lines=10)
411
 
412
- # Help
413
- with gr.Accordion(label="📝 Help", open=False):
414
- gr.Markdown("""
415
- **Controls:**
416
- - Ctrl+Drag: Scale
417
- - Alt+Drag: Move
418
- - Shift+Drag: Rotate
419
- - Ctrl+Z: Undo
420
- - D+Click: Delete person
421
- - Q+Click: Cut limb
422
- """)
423
-
424
- with gr.Column(scale=2):
425
- html = gr.HTML(html_text)
426
- with gr.Row():
427
- saveBtn = gr.Button("💾 Save", size="sm")
428
- generation_status = gr.Markdown("Ready to generate poses...")
429
 
430
- # Hidden components for JS communication
431
- canvas_update = gr.Textbox(visible=False)
432
- pose_data = gr.Textbox(visible=False)
433
-
434
- # Event handlers - without _js parameter
435
- def update_canvas_size(w, h):
436
- return f"{{\"width\": {w}, \"height\": {h}}}"
437
-
438
- width.change(
439
- fn=lambda w: update_canvas_size(w, None),
440
- inputs=[width],
441
- outputs=[canvas_update]
442
- )
443
-
444
- height.change(
445
- fn=lambda h: update_canvas_size(None, h),
446
- inputs=[height],
447
- outputs=[canvas_update]
448
- )
449
-
450
- source.change(
451
- fn=image_changed,
452
- inputs=[source],
453
- outputs=[estimationResult, json]
454
- )
455
-
456
- applySizeBtn.click(
457
- fn=lambda x: (x.width, x.height) if x else (512, 512),
458
- inputs=[source],
459
- outputs=[width, height]
460
- )
461
-
462
- def apply_pose_to_canvas(json_data):
463
- return js.dumps(json_data) if json_data else "{}"
464
-
465
- replaceBtn.click(
466
- fn=apply_pose_to_canvas,
467
- inputs=[json],
468
- outputs=[pose_data]
469
- )
470
-
471
- importBtn.click(
472
- fn=apply_pose_to_canvas,
473
- inputs=[json],
474
- outputs=[pose_data]
475
- )
476
-
477
- # LLM generation
478
- def handle_generate(prompt, use_llm):
479
- if not prompt:
480
- return None, "⚠️ Please enter a pose description", None
481
 
482
- try:
483
- status = "🔄 Generating pose with AI..." if use_llm else "🔄 Using template..."
484
- pose_json = generate_pose_from_text(prompt, use_llm)
485
- pose_dict = js.loads(pose_json)
486
- return pose_dict, "✅ Pose generated successfully!", pose_json
487
- except Exception as e:
488
- return None, f"❌ Error: {str(e)}", None
489
-
490
- generate_btn.click(
491
- fn=handle_generate,
492
- inputs=[prompt_input, use_llm_checkbox],
493
- outputs=[json, generation_status, pose_data]
494
- )
495
-
496
- def toggle_refinement():
497
- return gr.update(visible=True)
498
 
499
- refine_btn.click(
500
- fn=toggle_refinement,
501
- outputs=[refinement_prompt]
502
- )
 
503
 
504
- def handle_refine(current_json, refinement):
505
- if not current_json or not refinement:
506
- return current_json, "⚠️ Need current pose and refinement instructions", None
507
 
508
- try:
509
- refined = refine_pose_with_llm(current_json, refinement)
510
- return refined, "✅ Pose refined!", js.dumps(refined)
511
- except Exception as e:
512
- return current_json, f"❌ Refinement error: {str(e)}", None
513
 
514
- refinement_prompt.submit(
515
- fn=handle_refine,
516
- inputs=[json, refinement_prompt],
517
- outputs=[json, generation_status, pose_data]
518
- )
 
 
 
 
 
519
 
520
- def save_current_pose():
521
- # This would normally interact with the canvas
522
- return {"message": "Use the canvas save function"}
523
 
524
- saveBtn.click(
525
- fn=save_current_pose,
526
- outputs=[json]
 
527
  )
528
 
529
- jsonSource.change(
530
- fn=lambda x: js.loads(x) if x else {},
531
- inputs=[jsonSource],
532
- outputs=[json]
533
  )
534
 
535
- replaceWithJsonBtn.click(
536
- fn=apply_pose_to_canvas,
537
- inputs=[json],
538
- outputs=[pose_data]
539
  )
540
 
541
- importJsonBtn.click(
542
- fn=apply_pose_to_canvas,
543
- inputs=[json],
544
- outputs=[pose_data]
545
  )
546
-
547
- # Check API key status
548
- def check_api_status():
549
- if FIREWORKS_API_KEY == "YOUR_API_KEY_HERE":
550
- return "⚠️ LLM API key not configured - using templates"
551
- return "✅ LLM ready"
552
-
553
- demo.load(fn=check_api_status, outputs=[llm_status])
554
-
555
- # Mount Gradio app to FastAPI
556
- gr.mount_gradio_app(app, demo, path="/")
557
 
558
- # Main entry point
559
  if __name__ == "__main__":
560
- import uvicorn
561
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
 
1
  import gradio as gr
2
  import numpy as np
3
+ import json
 
 
 
4
  import requests
5
  import os
6
  from typing import Dict, List, Tuple
7
+ import base64
8
+ from PIL import Image, ImageDraw
9
+ import io
10
 
11
  # Fireworks AI configuration
12
  FIREWORKS_API_KEY = os.getenv("FIREWORKS_API_KEY", "YOUR_API_KEY_HERE")
 
20
  "LEye": 15, "REar": 16, "LEar": 17
21
  }
22
 
23
+ # Skeleton connections for drawing
24
+ POSE_CONNECTIONS = [
25
+ ("Neck", "RShoulder"), ("RShoulder", "RElbow"), ("RElbow", "RWrist"),
26
+ ("Neck", "LShoulder"), ("LShoulder", "LElbow"), ("LElbow", "LWrist"),
27
+ ("Neck", "Nose"), ("Nose", "REye"), ("Nose", "LEye"),
28
+ ("REye", "REar"), ("LEye", "LEar"),
29
+ ("Neck", "RHip"), ("RHip", "RKnee"), ("RKnee", "RAnkle"),
30
+ ("Neck", "LHip"), ("LHip", "LKnee"), ("LKnee", "LAnkle"),
31
+ ("RHip", "LHip")
32
+ ]
33
+
34
+ # Pose templates
35
  POSE_TEMPLATES = {
36
+ "서있기 (Standing)": {
37
+ "Nose": [256, 80], "Neck": [256, 120],
38
+ "RShoulder": [220, 140], "RElbow": [200, 220], "RWrist": [190, 300],
39
+ "LShoulder": [292, 140], "LElbow": [312, 220], "LWrist": [322, 300],
40
+ "RHip": [240, 280], "RKnee": [235, 380], "RAnkle": [230, 480],
41
+ "LHip": [272, 280], "LKnee": [277, 380], "LAnkle": [282, 480],
42
+ "REye": [246, 70], "LEye": [266, 70], "REar": [236, 75], "LEar": [276, 75]
43
+ },
44
+ "앉기 (Sitting)": {
45
+ "Nose": [256, 120], "Neck": [256, 160],
46
+ "RShoulder": [220, 180], "RElbow": [200, 240], "RWrist": [190, 300],
47
+ "LShoulder": [292, 180], "LElbow": [312, 240], "LWrist": [322, 300],
48
+ "RHip": [240, 320], "RKnee": [280, 380], "RAnkle": [320, 400],
49
+ "LHip": [272, 320], "LKnee": [232, 380], "LAnkle": [192, 400],
50
+ "REye": [246, 110], "LEye": [266, 110], "REar": [236, 115], "LEar": [276, 115]
51
  },
52
+ "달리기 (Running)": {
53
+ "Nose": [256, 80], "Neck": [256, 120],
54
+ "RShoulder": [220, 140], "RElbow": [180, 180], "RWrist": [150, 140],
55
+ "LShoulder": [292, 140], "LElbow": [332, 200], "LWrist": [362, 260],
56
+ "RHip": [240, 280], "RKnee": [260, 380], "RAnkle": [290, 470],
57
+ "LHip": [272, 280], "LKnee": [252, 360], "LAnkle": [222, 440],
58
+ "REye": [246, 70], "LEye": [266, 70], "REar": [236, 75], "LEar": [276, 75]
 
59
  },
60
+ "요가 (Yoga)": {
61
+ "Nose": [256, 100], "Neck": [256, 140],
62
+ "RShoulder": [200, 160], "RElbow": [150, 120], "RWrist": [100, 100],
63
+ "LShoulder": [312, 160], "LElbow": [362, 120], "LWrist": [412, 100],
64
+ "RHip": [240, 300], "RKnee": [220, 400], "RAnkle": [200, 480],
65
+ "LHip": [272, 300], "LKnee": [292, 400], "LAnkle": [312, 480],
66
+ "REye": [246, 90], "LEye": [266, 90], "REar": [236, 95], "LEar": [276, 95]
 
67
  },
68
+ "춤추기 (Dancing)": {
69
+ "Nose": [256, 80], "Neck": [256, 120],
70
+ "RShoulder": [220, 140], "RElbow": [180, 120], "RWrist": [140, 100],
71
+ "LShoulder": [292, 140], "LElbow": [332, 160], "LWrist": [372, 140],
72
+ "RHip": [240, 280], "RKnee": [260, 380], "RAnkle": [250, 480],
73
+ "LHip": [272, 280], "LKnee": [252, 380], "LAnkle": [262, 480],
74
+ "REye": [246, 70], "LEye": [266, 70], "REar": [236, 75], "LEar": [276, 75]
 
75
  }
76
  }
77
 
78
+ def draw_pose(keypoints: Dict, width: int = 512, height: int = 512) -> Image.Image:
79
+ """
80
+ 키포인트를 기반으로 포즈 이미지 그리기
81
+ """
82
+ img = Image.new('RGB', (width, height), color='white')
83
+ draw = ImageDraw.Draw(img)
84
+
85
+ # Draw skeleton connections
86
+ for start, end in POSE_CONNECTIONS:
87
+ if start in keypoints and end in keypoints:
88
+ start_point = keypoints[start]
89
+ end_point = keypoints[end]
90
+ if start_point and end_point:
91
+ draw.line([tuple(start_point), tuple(end_point)], fill='blue', width=3)
92
+
93
+ # Draw keypoints
94
+ for part, point in keypoints.items():
95
+ if point:
96
+ x, y = point
97
+ radius = 5
98
+ draw.ellipse([x-radius, y-radius, x+radius, y+radius], fill='red', outline='darkred')
99
+
100
+ return img
101
 
102
+ def generate_pose_from_llm(prompt: str) -> Dict:
103
  """
104
+ LLM을 사용하여 텍스트로부터 포즈 생성
105
  """
106
+ system_prompt = """You are an expert in generating human pose keypoints.
107
+ Given a description, generate 18 keypoint coordinates for OpenPose.
108
+
109
+ Canvas size: 512x512 pixels
110
+ Keypoints: Nose, Neck, RShoulder, RElbow, RWrist, LShoulder, LElbow, LWrist,
111
+ RHip, RKnee, RAnkle, LHip, LKnee, LAnkle, REye, LEye, REar, LEar
112
 
113
+ Return ONLY a JSON object with keypoint names and [x, y] coordinates.
114
+ Example: {"Nose": [256, 80], "Neck": [256, 120], ...}
 
 
115
 
116
+ Ensure anatomically correct proportions and center the pose."""
117
 
118
  headers = {
119
  "Accept": "application/json",
 
123
 
124
  payload = {
125
  "model": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
126
+ "max_tokens": 1024,
127
  "temperature": 0.3,
128
  "messages": [
129
  {"role": "system", "content": system_prompt},
130
+ {"role": "user", "content": f"Generate pose keypoints for: {prompt}"}
131
  ]
132
  }
133
 
 
137
  data = response.json()
138
  content = data['choices'][0]['message']['content']
139
 
140
+ # Extract JSON from response
141
  import re
142
  json_match = re.search(r'\{.*\}', content, re.DOTALL)
143
  if json_match:
144
+ keypoints = json.loads(json_match.group())
145
+ return keypoints
146
  except Exception as e:
147
  print(f"LLM Error: {e}")
148
 
149
+ # Fallback to template
150
+ return get_template_from_prompt(prompt)
151
 
152
+ def get_template_from_prompt(prompt: str) -> Dict:
153
  """
154
+ 프롬프트에서 키워드를 찾아 적절한 템플릿 선택
155
  """
156
  prompt_lower = prompt.lower()
157
 
158
+ if any(word in prompt_lower for word in ["앉", "sit", "chair", "의자"]):
159
+ return POSE_TEMPLATES["앉기 (Sitting)"]
160
+ elif any(word in prompt_lower for word in ["달리", "run", "jog", "뛰"]):
161
+ return POSE_TEMPLATES["달리기 (Running)"]
162
+ elif any(word in prompt_lower for word in ["요가", "yoga", "명상", "meditation"]):
163
+ return POSE_TEMPLATES["요가 (Yoga)"]
164
+ elif any(word in prompt_lower for word in ["", "dance", "댄스"]):
165
+ return POSE_TEMPLATES["춤추기 (Dancing)"]
166
  else:
167
+ return POSE_TEMPLATES["서있기 (Standing)"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
+ def refine_pose(current_keypoints: Dict, instruction: str) -> Dict:
170
  """
171
+ 기존 포즈를 지시사항에 따라 수정
172
  """
173
+ if FIREWORKS_API_KEY == "YOUR_API_KEY_HERE":
174
+ # Simple rule-based refinement
175
+ keypoints = current_keypoints.copy()
176
+ instruction_lower = instruction.lower()
177
+
178
+ if "팔" in instruction_lower or "arm" in instruction_lower:
179
+ if "올리" in instruction_lower or "raise" in instruction_lower:
180
+ # Raise arms
181
+ if "RWrist" in keypoints:
182
+ keypoints["RWrist"][1] -= 50
183
+ if "LWrist" in keypoints:
184
+ keypoints["LWrist"][1] -= 50
185
+ elif "내리" in instruction_lower or "lower" in instruction_lower:
186
+ # Lower arms
187
+ if "RWrist" in keypoints:
188
+ keypoints["RWrist"][1] += 50
189
+ if "LWrist" in keypoints:
190
+ keypoints["LWrist"][1] += 50
191
+
192
+ return keypoints
193
+
194
+ # Use LLM for refinement
195
+ system_prompt = """Modify the given pose keypoints based on the instruction.
196
+ Return the modified keypoints in the same JSON format."""
197
 
198
  headers = {
199
  "Accept": "application/json",
 
203
 
204
  payload = {
205
  "model": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
206
+ "max_tokens": 1024,
207
  "temperature": 0.2,
208
  "messages": [
209
  {"role": "system", "content": system_prompt},
210
+ {"role": "user", "content": f"Current keypoints: {json.dumps(current_keypoints)}\nInstruction: {instruction}"}
211
  ]
212
  }
213
 
 
220
  import re
221
  json_match = re.search(r'\{.*\}', content, re.DOTALL)
222
  if json_match:
223
+ return json.loads(json_match.group())
224
  except Exception as e:
225
  print(f"Refinement error: {e}")
226
 
227
+ return current_keypoints
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
+ def keypoints_to_openpose_format(keypoints: Dict) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  """
231
+ 키포인트를 OpenPose JSON 형식으로 변환
232
  """
233
+ candidate = []
234
+ for i in range(18):
235
+ part_name = None
236
+ for name, idx in BODY_PARTS.items():
237
+ if idx == i:
238
+ part_name = name
239
+ break
240
+
241
+ if part_name and part_name in keypoints:
242
+ x, y = keypoints[part_name]
243
+ candidate.append([float(x), float(y), 1.0])
244
+ else:
245
+ candidate.append([0.0, 0.0, 0.0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
+ subset = [[i for i in range(18) if candidate[i][2] > 0]]
248
+ subset[0].extend([float(len(subset[0])), len(subset[0])])
 
 
 
 
 
 
 
 
249
 
250
+ return json.dumps({"candidate": candidate, "subset": subset}, indent=2)
 
 
 
 
 
 
 
251
 
252
+ # Gradio Interface
253
+ with gr.Blocks(title="AI Pose Generator", theme=gr.themes.Soft()) as demo:
254
+ current_keypoints = gr.State({})
 
 
 
 
255
 
256
  gr.Markdown("""
257
+ # 🎨 AI 포즈 생성기 (Line Art Pose Generator)
258
+ ### 텍스트 설명으로 정확한 포즈를 생성합니다
259
  """)
260
 
261
+ with gr.Tabs():
262
+ with gr.TabItem("🤖 AI 포즈 생성"):
263
+ with gr.Row():
264
+ with gr.Column(scale=1):
265
+ # LLM 설정
266
+ use_llm = gr.Checkbox(
267
+ label="🚀 고급 AI 모델 사용 (Fireworks API)",
268
+ value=False,
269
+ info="체크하면 정확한 포즈 생성 (API 키 필요)"
270
+ )
271
+
272
+ api_status = gr.Markdown("⚠️ API 키 미설정 - 템플릿 모드")
273
+
274
+ # 텍스트 입력
275
+ prompt = gr.Textbox(
276
+ label="포즈 설명",
277
+ placeholder="예: 의자에 앉아 책을 읽는 사람",
278
+ lines=3
279
+ )
280
+
281
+ # 예제
282
+ gr.Examples(
283
+ examples=[
284
+ "팔을 높이 들고 승리의 포즈",
285
+ "의자에 앉아 노트북 타이핑",
286
+ "한쪽 다리로 서서 요가 포즈",
287
+ "양손을 허리에 올린 자신감 있는 포즈",
288
+ "달리기 자세",
289
+ "무릎 꿇고 기도하는 자세"
290
+ ],
291
+ inputs=prompt
292
+ )
293
+
294
+ generate_btn = gr.Button("🎯 포즈 생성", variant="primary", size="lg")
295
+
296
+ # 템플릿 선택
297
+ with gr.Accordion("📚 템플릿 선택", open=False):
298
+ template_select = gr.Dropdown(
299
+ choices=list(POSE_TEMPLATES.keys()),
300
+ label="포즈 템플릿",
301
+ value="서있기 (Standing)"
302
+ )
303
+ use_template_btn = gr.Button("템플릿 적용")
304
 
305
+ with gr.Column(scale=1):
306
+ # 포즈 이미지 출력
307
+ pose_image = gr.Image(
308
+ label="생성된 포즈",
309
+ type="pil",
310
+ height=512
311
+ )
312
+
313
+ # JSON 출력
314
+ with gr.Accordion("📋 OpenPose JSON", open=False):
315
+ json_output = gr.Code(
316
+ label="JSON 데이터",
317
+ language="json",
318
+ lines=10
319
+ )
320
+
321
+ download_btn = gr.Button("💾 JSON 다운로드", size="sm")
322
+
323
+ with gr.TabItem("✏️ 포즈 수정"):
324
+ with gr.Row():
325
+ with gr.Column():
326
+ refinement_instruction = gr.Textbox(
327
+ label="수정 지시사항",
328
+ placeholder="예: 왼쪽 팔을 더 높이 들어주세요",
329
+ lines=2
330
+ )
331
+
332
+ refine_btn = gr.Button("✨ 포즈 수정", variant="secondary")
333
+
334
+ # 미세 조정
335
+ with gr.Accordion("🎛️ 수동 조정", open=False):
336
+ selected_part = gr.Dropdown(
337
+ choices=list(BODY_PARTS.keys()),
338
+ label="조정할 부위",
339
+ value="RWrist"
340
+ )
341
+ x_adjust = gr.Slider(-50, 50, 0, label="X 조정")
342
+ y_adjust = gr.Slider(-50, 50, 0, label="Y 조정")
343
+ apply_adjust_btn = gr.Button("적용")
344
 
345
+ with gr.Column():
346
+ refined_image = gr.Image(
347
+ label="수정된 포즈",
348
+ type="pil",
349
+ height=512
350
+ )
351
+
352
+ with gr.TabItem("ℹ️ 사용법"):
353
+ gr.Markdown("""
354
+ ## 사용 방법
 
355
 
356
+ ### 1. AI 포즈 생성
357
+ - **텍스트 설명**: 원하는 포즈를 자연어로 설명하세요
358
+ - **고급 AI 모델**: Fireworks API 키가 있으면 더 정확한 생성 가능
359
+ - **템플릿**: 빠른 시작을 위한 기본 포즈 제공
 
 
 
 
360
 
361
+ ### 2. 포즈 수정
362
+ - **자연어 수정**: "팔을 올려주세요" 같은 지시로 수정
363
+ - **수동 조정**: 특정 관절을 직접 이동
 
 
 
 
364
 
365
+ ### 3. 내보내기
366
+ - OpenPose 형식 JSON으로 다운로드
367
+ - ControlNet 등에서 사용 가능
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
 
369
+ ### API 설정 (선택사항)
370
+ ```bash
371
+ export FIREWORKS_API_KEY="your_api_key"
372
+ ```
373
+
374
+ ### 특징
375
+ - 🚀 GPU 불필요 - CPU만으로 작동
376
+ - 🎨 깔끔한 라인 아트 스타일
377
+ - 📊 OpenPose 호환 형식
378
+ - 🔧 쉬운 수정 도구
379
+ """)
380
+
381
+ # Event handlers
382
+ def check_api_status():
383
+ if FIREWORKS_API_KEY != "YOUR_API_KEY_HERE":
384
+ return "✅ API 키 설정됨 - 고급 AI 사용 가능"
385
+ return "⚠️ API 키 미설정 - 템플릿 모드"
386
+
387
+ def generate_pose(prompt_text, use_llm_flag):
388
+ if not prompt_text and not use_llm_flag:
389
+ # Use default template
390
+ keypoints = POSE_TEMPLATES["서있기 (Standing)"]
391
+ elif use_llm_flag and FIREWORKS_API_KEY != "YOUR_API_KEY_HERE":
392
+ keypoints = generate_pose_from_llm(prompt_text)
393
+ else:
394
+ keypoints = get_template_from_prompt(prompt_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
395
 
396
+ # Draw pose
397
+ pose_img = draw_pose(keypoints)
398
+ json_str = keypoints_to_openpose_format(keypoints)
399
+
400
+ return pose_img, json_str, keypoints
 
 
 
 
 
 
 
 
 
 
 
401
 
402
+ def use_template(template_name):
403
+ keypoints = POSE_TEMPLATES[template_name]
404
+ pose_img = draw_pose(keypoints)
405
+ json_str = keypoints_to_openpose_format(keypoints)
406
+ return pose_img, json_str, keypoints
407
 
408
+ def refine_existing_pose(instruction, keypoints_state):
409
+ if not keypoints_state:
410
+ return None
411
 
412
+ refined_keypoints = refine_pose(keypoints_state, instruction)
413
+ pose_img = draw_pose(refined_keypoints)
414
+ return pose_img, refined_keypoints
 
 
415
 
416
+ def manual_adjust(part, x_adj, y_adj, keypoints_state):
417
+ if not keypoints_state or part not in keypoints_state:
418
+ return None, keypoints_state
419
+
420
+ adjusted_keypoints = keypoints_state.copy()
421
+ adjusted_keypoints[part][0] += x_adj
422
+ adjusted_keypoints[part][1] += y_adj
423
+
424
+ pose_img = draw_pose(adjusted_keypoints)
425
+ return pose_img, adjusted_keypoints
426
 
427
+ # Connect events
428
+ demo.load(check_api_status, outputs=api_status)
 
429
 
430
+ generate_btn.click(
431
+ generate_pose,
432
+ inputs=[prompt, use_llm],
433
+ outputs=[pose_image, json_output, current_keypoints]
434
  )
435
 
436
+ use_template_btn.click(
437
+ use_template,
438
+ inputs=[template_select],
439
+ outputs=[pose_image, json_output, current_keypoints]
440
  )
441
 
442
+ refine_btn.click(
443
+ refine_existing_pose,
444
+ inputs=[refinement_instruction, current_keypoints],
445
+ outputs=[refined_image, current_keypoints]
446
  )
447
 
448
+ apply_adjust_btn.click(
449
+ manual_adjust,
450
+ inputs=[selected_part, x_adjust, y_adjust, current_keypoints],
451
+ outputs=[refined_image, current_keypoints]
452
  )
 
 
 
 
 
 
 
 
 
 
 
453
 
454
+ # Launch
455
  if __name__ == "__main__":
456
+ demo.launch(
457
+ server_name="0.0.0.0",
458
+ server_port=7860,
459
+ share=False,
460
+ show_api=False
461
+ )