Spaces:

srivatsavdamaraju
/

archery_dataset-creating-code

Sleeping

App Files Files Community

srivatsavdamaraju commited on Jul 9, 2025

Commit

e4a4ccb

verified ·

1 Parent(s): 99a25f0

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -76

app.py CHANGED Viewed

@@ -1,93 +1,100 @@
 import gradio as gr
-import mediapipe as mp
 import cv2
 import numpy as np
-from openai import OpenAI
-import base64
-import tempfile
-import requests
-# Initialize MediaPipe Pose
 mp_pose = mp.solutions.pose
-pose = mp_pose.Pose(static_image_mode=True)
-# Function to extract pose landmarks
-def extract_pose(image):
-    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-    results = pose.process(image_rgb)
-    if results.pose_landmarks:
-        pose_data = [
-            {
-                "id": i,
-                "x": lm.x,
-                "y": lm.y,
-                "z": lm.z,
-                "visibility": lm.visibility
-            }
-            for i, lm in enumerate(results.pose_landmarks.landmark)
-        ]
-        return pose_data, image
-    else:
-        return "No pose landmarks found.", image
-# Function to convert image to base64
-def image_to_base64(img_np):
-    _, buffer = cv2.imencode('.jpg', img_np)
-    return base64.b64encode(buffer).decode('utf-8')
-# Call Vision LLM
-def call_llama_vlm(image, pose_data):
-    # Save image to temp and upload to imgbb or similar if needed
-    img_base64 = image_to_base64(image)
-    # Construct data for OpenRouter API
-    client = OpenAI(
-        base_url="https://openrouter.ai/api/v1",
-        api_key="<OPENROUTER_API_KEY>",
-    )
-    completion = client.chat.completions.create(
-        extra_headers={
-            "HTTP-Referer": "<YOUR_SITE_URL>",
-            "X-Title": "<YOUR_SITE_NAME>",
-        },
-        model="meta-llama/llama-3.2-11b-vision-instruct:free",
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": f"What is this pose doing? Pose data: {pose_data}"
-                    },
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": f"data:image/jpeg;base64,{img_base64}"
-                        }
-                    }
-                ]
-            }
-        ]
     )
-    return completion.choices[0].message.content
-# Gradio Interface
-def process(image):
-    pose_data, img = extract_pose(image)
-    if isinstance(pose_data, str):
-        return pose_data
-    else:
-        description = call_llama_vlm(img, pose_data)
-        return description
 interface = gr.Interface(
     fn=process,
     inputs=gr.Image(type="numpy", label="Upload Pose Image"),
     outputs="text",
-    title="Pose Analysis with MediaPipe and Vision LLM"
 )
 interface.launch()

+# Install required packages (if not done)
+!pip install gradio opencv-python mediapipe
 import gradio as gr
 import cv2
 import numpy as np
+from PIL import Image
+import mediapipe as mp
+import os
+import json
+from datetime import datetime
+# Setup folders
+os.makedirs("pose_images", exist_ok=True)
+json_path = "pose_dataset.json"
+# Load or create dataset
+if os.path.exists(json_path):
+    with open(json_path, "r") as f:
+        pose_dataset = json.load(f)
+else:
+    pose_dataset = {}
+# MediaPipe pose setup
 mp_pose = mp.solutions.pose
+pose_model = mp_pose.Pose(static_image_mode=True, model_complexity=2)
+mp_drawing = mp.solutions.drawing_utils
+mp_styles = mp.solutions.drawing_styles
+# 🔧 Process function
+def process(image):
+    if image is None:
+        return "❌ Please upload an image."
+    # Timestamp-based ID
+    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+    pose_id = f"pose_{ts}"
+    # Convert to OpenCV
+    img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+    # Detect pose
+    results = pose_model.process(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
+    if not results.pose_landmarks:
+        return "❌ No pose detected."
+    # Draw overlay
+    overlay = img_bgr.copy()
+    mp_drawing.draw_landmarks(
+        overlay,
+        results.pose_landmarks,
+        mp_pose.POSE_CONNECTIONS,
+        landmark_drawing_spec=mp_styles.get_default_pose_landmarks_style()
     )
+    # Save overlay image
+    overlay_path = f"pose_images/{pose_id}.png"
+    cv2.imwrite(overlay_path, overlay)
+    # Extract coordinates
+    pose_coords = {}
+    for idx, lm in enumerate(results.pose_landmarks.landmark):
+        name = mp_pose.PoseLandmark(idx).name
+        pose_coords[name] = {
+            "x": round(lm.x, 4),
+            "y": round(lm.y, 4),
+            "z": round(lm.z, 4),
+            "visibility": round(lm.visibility, 3)
+        }
+    # Save to JSON
+    pose_dataset[pose_id] = {
+        "correct pose name": pose_id,
+        "Image with pose overlay": overlay_path,
+        "Pose coordinates": pose_coords,
+        "Pose description": "To be filled"
+    }
+    with open(json_path, "w") as f:
+        json.dump(pose_dataset, f, indent=2)
+    # Return basic preview
+    preview = f"✅ Pose saved as `{pose_id}` with {len(pose_coords)} joints.\n"
+    for joint, v in list(pose_coords.items())[:5]:  # show first 5
+        preview += f"{joint}: x={v['x']} y={v['y']} z={v['z']}\n"
+    preview += f"\n🖼 Overlay image saved at `{overlay_path}`"
+    return preview
+# ✅ Interface-style Gradio UI
 interface = gr.Interface(
     fn=process,
     inputs=gr.Image(type="numpy", label="Upload Pose Image"),
     outputs="text",
+    title="🧘 Pose Analysis with MediaPipe",
+    description="Upload a yoga or archery pose image. This tool will extract pose keypoints using MediaPipe and save them in a JSON file."
 )
 interface.launch()