srivatsavdamaraju commited on
Commit
e4a4ccb
·
verified ·
1 Parent(s): 99a25f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -76
app.py CHANGED
@@ -1,93 +1,100 @@
 
 
 
1
  import gradio as gr
2
- import mediapipe as mp
3
  import cv2
4
  import numpy as np
5
- from openai import OpenAI
6
- import base64
7
- import tempfile
8
- import requests
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- # Initialize MediaPipe Pose
11
  mp_pose = mp.solutions.pose
12
- pose = mp_pose.Pose(static_image_mode=True)
13
-
14
- # Function to extract pose landmarks
15
- def extract_pose(image):
16
- image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
17
- results = pose.process(image_rgb)
18
-
19
- if results.pose_landmarks:
20
- pose_data = [
21
- {
22
- "id": i,
23
- "x": lm.x,
24
- "y": lm.y,
25
- "z": lm.z,
26
- "visibility": lm.visibility
27
- }
28
- for i, lm in enumerate(results.pose_landmarks.landmark)
29
- ]
30
- return pose_data, image
31
- else:
32
- return "No pose landmarks found.", image
33
-
34
- # Function to convert image to base64
35
- def image_to_base64(img_np):
36
- _, buffer = cv2.imencode('.jpg', img_np)
37
- return base64.b64encode(buffer).decode('utf-8')
38
-
39
- # Call Vision LLM
40
- def call_llama_vlm(image, pose_data):
41
- # Save image to temp and upload to imgbb or similar if needed
42
- img_base64 = image_to_base64(image)
43
-
44
- # Construct data for OpenRouter API
45
- client = OpenAI(
46
- base_url="https://openrouter.ai/api/v1",
47
- api_key="<OPENROUTER_API_KEY>",
48
- )
49
 
50
- completion = client.chat.completions.create(
51
- extra_headers={
52
- "HTTP-Referer": "<YOUR_SITE_URL>",
53
- "X-Title": "<YOUR_SITE_NAME>",
54
- },
55
- model="meta-llama/llama-3.2-11b-vision-instruct:free",
56
- messages=[
57
- {
58
- "role": "user",
59
- "content": [
60
- {
61
- "type": "text",
62
- "text": f"What is this pose doing? Pose data: {pose_data}"
63
- },
64
- {
65
- "type": "image_url",
66
- "image_url": {
67
- "url": f"data:image/jpeg;base64,{img_base64}"
68
- }
69
- }
70
- ]
71
- }
72
- ]
73
  )
74
 
75
- return completion.choices[0].message.content
 
 
76
 
77
- # Gradio Interface
78
- def process(image):
79
- pose_data, img = extract_pose(image)
80
- if isinstance(pose_data, str):
81
- return pose_data
82
- else:
83
- description = call_llama_vlm(img, pose_data)
84
- return description
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
 
86
  interface = gr.Interface(
87
  fn=process,
88
  inputs=gr.Image(type="numpy", label="Upload Pose Image"),
89
  outputs="text",
90
- title="Pose Analysis with MediaPipe and Vision LLM"
 
91
  )
92
 
93
  interface.launch()
 
1
+ # Install required packages (if not done)
2
+ !pip install gradio opencv-python mediapipe
3
+
4
  import gradio as gr
 
5
  import cv2
6
  import numpy as np
7
+ from PIL import Image
8
+ import mediapipe as mp
9
+ import os
10
+ import json
11
+ from datetime import datetime
12
+
13
+ # Setup folders
14
+ os.makedirs("pose_images", exist_ok=True)
15
+ json_path = "pose_dataset.json"
16
+
17
+ # Load or create dataset
18
+ if os.path.exists(json_path):
19
+ with open(json_path, "r") as f:
20
+ pose_dataset = json.load(f)
21
+ else:
22
+ pose_dataset = {}
23
 
24
+ # MediaPipe pose setup
25
  mp_pose = mp.solutions.pose
26
+ pose_model = mp_pose.Pose(static_image_mode=True, model_complexity=2)
27
+ mp_drawing = mp.solutions.drawing_utils
28
+ mp_styles = mp.solutions.drawing_styles
29
+
30
+ # 🔧 Process function
31
+ def process(image):
32
+ if image is None:
33
+ return "❌ Please upload an image."
34
+
35
+ # Timestamp-based ID
36
+ ts = datetime.now().strftime("%Y%m%d_%H%M%S")
37
+ pose_id = f"pose_{ts}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ # Convert to OpenCV
40
+ img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
41
+
42
+ # Detect pose
43
+ results = pose_model.process(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
44
+ if not results.pose_landmarks:
45
+ return "❌ No pose detected."
46
+
47
+ # Draw overlay
48
+ overlay = img_bgr.copy()
49
+ mp_drawing.draw_landmarks(
50
+ overlay,
51
+ results.pose_landmarks,
52
+ mp_pose.POSE_CONNECTIONS,
53
+ landmark_drawing_spec=mp_styles.get_default_pose_landmarks_style()
 
 
 
 
 
 
 
 
54
  )
55
 
56
+ # Save overlay image
57
+ overlay_path = f"pose_images/{pose_id}.png"
58
+ cv2.imwrite(overlay_path, overlay)
59
 
60
+ # Extract coordinates
61
+ pose_coords = {}
62
+ for idx, lm in enumerate(results.pose_landmarks.landmark):
63
+ name = mp_pose.PoseLandmark(idx).name
64
+ pose_coords[name] = {
65
+ "x": round(lm.x, 4),
66
+ "y": round(lm.y, 4),
67
+ "z": round(lm.z, 4),
68
+ "visibility": round(lm.visibility, 3)
69
+ }
70
+
71
+ # Save to JSON
72
+ pose_dataset[pose_id] = {
73
+ "correct pose name": pose_id,
74
+ "Image with pose overlay": overlay_path,
75
+ "Pose coordinates": pose_coords,
76
+ "Pose description": "To be filled"
77
+ }
78
+
79
+ with open(json_path, "w") as f:
80
+ json.dump(pose_dataset, f, indent=2)
81
+
82
+ # Return basic preview
83
+ preview = f"✅ Pose saved as `{pose_id}` with {len(pose_coords)} joints.\n"
84
+ for joint, v in list(pose_coords.items())[:5]: # show first 5
85
+ preview += f"{joint}: x={v['x']} y={v['y']} z={v['z']}\n"
86
+
87
+ preview += f"\n🖼 Overlay image saved at `{overlay_path}`"
88
+
89
+ return preview
90
 
91
+ # ✅ Interface-style Gradio UI
92
  interface = gr.Interface(
93
  fn=process,
94
  inputs=gr.Image(type="numpy", label="Upload Pose Image"),
95
  outputs="text",
96
+ title="🧘 Pose Analysis with MediaPipe",
97
+ description="Upload a yoga or archery pose image. This tool will extract pose keypoints using MediaPipe and save them in a JSON file."
98
  )
99
 
100
  interface.launch()