Spaces:

srivatsavdamaraju
/

archery_dataset-creating-code

Sleeping

App Files Files Community

srivatsavdamaraju commited on Jul 5, 2025

Commit

8554fb0

verified ·

1 Parent(s): 4ccef7f

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -228

app.py CHANGED Viewed

@@ -1,230 +1,93 @@
 import gradio as gr
-import cv2
-import os
-import uuid
-import threading
-import time
 import mediapipe as mp
-import pandas as pd
-from concurrent.futures import ThreadPoolExecutor
-import queue
-# === Setup ===
-OUTPUT_DIR = "captured_frames"
-os.makedirs(OUTPUT_DIR, exist_ok=True)
-df = pd.DataFrame(columns=["filename", "caption", "pose_coords"])
-pose = mp.solutions.pose.Pose()
-state = {
-    "cap": None,
-    "frame": None,
-    "frame_rgb": None,  # Pre-converted RGB frame
-    "play": False,
-    "video_path": None,
-    "capture_queue": queue.Queue(),
-    "processing_thread": None
-}
-# Thread pool for background processing
-executor = ThreadPoolExecutor(max_workers=2)
-# === Background pose processing ===
-def process_pose_async(frame_bgr, filename, caption):
-    """Process pose estimation in background thread"""
-    try:
-        # Convert to RGB for MediaPipe
-        frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
-        results = pose.process(frame_rgb)
-        coords = []
-        if results.pose_landmarks:
-            for lm in results.pose_landmarks.landmark:
-                coords.append((round(lm.x, 5), round(lm.y, 5), round(lm.z, 5)))
-        # Add to dataframe
-        global df
-        new_row = pd.DataFrame([{
-            "filename": filename,
-            "caption": caption,
-            "pose_coords": coords
-        }])
-        df = pd.concat([df, new_row], ignore_index=True)
-    except Exception as e:
-        print(f"Error processing pose: {e}")
-# === Load Video ===
-def load_video(video_file):
-    try:
-        if hasattr(video_file, "name"):
-            video_path = video_file.name
-        else:
-            video_path = video_file
-        state["video_path"] = video_path
-        if state["cap"]:
-            state["cap"].release()
-        state["cap"] = cv2.VideoCapture(video_path)
-        # Set buffer size to reduce lag
-        state["cap"].set(cv2.CAP_PROP_BUFFERSIZE, 1)
-        state["frame"] = None
-        state["frame_rgb"] = None
-        state["play"] = False
-        return "✅ Video loaded successfully!"
-    except Exception as e:
-        return f"❌ Error loading video: {e}"
-# === Play video in background ===
-def play_video():
-    if not state["cap"]:
-        return "⚠️ Load a video first."
-    state["play"] = True
-    def stream():
-        while state["cap"] and state["cap"].isOpened() and state["play"]:
-            ret, frame = state["cap"].read()
-            if not ret:
-                state["play"] = False
-                break
-            # Store both BGR and RGB versions
-            state["frame"] = frame
-            state["frame_rgb"] = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            # Adaptive delay based on video FPS
-            fps = state["cap"].get(cv2.CAP_PROP_FPS)
-            if fps > 0:
-                time.sleep(1.0 / fps)
-            else:
-                time.sleep(0.033)  # ~30 FPS fallback
-    threading.Thread(target=stream, daemon=True).start()
-    return "▶️ Playing..."
-# === Pause playback ===
-def pause_video():
-    state["play"] = False
-    return "⏸️ Paused."
-# === Show current frame ===
-def show_frame():
-    if state["frame_rgb"] is not None:
-        return state["frame_rgb"]  # Already in RGB
-    return None
-# === Fast capture frame (immediate pause + async processing) ===
-def capture_frame(caption):
-    if state["frame"] is None:
-        return "⚠️ No frame to capture.", None
-    # IMMEDIATE pause - this is the key optimization
-    state["play"] = False
-    # Capture current frame immediately
-    frame_bgr = state["frame"].copy()  # Copy to avoid race conditions
-    frame_rgb = state["frame_rgb"].copy() if state["frame_rgb"] is not None else cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
-    # Generate filename and save immediately
-    filename = f"{uuid.uuid4().hex[:8]}.jpg"
-    path = os.path.join(OUTPUT_DIR, filename)
-    cv2.imwrite(path, frame_bgr)
-    # Process pose estimation in background (non-blocking)
-    executor.submit(process_pose_async, frame_bgr, filename, caption)
-    return f"✅ Captured & paused: {filename} (processing pose...)", frame_rgb
-# === Show dataset info ===
-def show_dataset_info():
-    return f"📊 Dataset contains {len(df)} samples"
-# === Download CSV ===
-def download_csv():
-    path = os.path.join(OUTPUT_DIR, "pose_dataset.csv")
-    df.to_csv(path, index=False)
-    return path
-# === Reset all ===
-def reset_all():
-    global df
-    df = pd.DataFrame(columns=["filename", "caption", "pose_coords"])
-    # Clean up files
-    try:
-        for f in os.listdir(OUTPUT_DIR):
-            file_path = os.path.join(OUTPUT_DIR, f)
-            if os.path.isfile(file_path):
-                os.remove(file_path)
-    except Exception as e:
-        print(f"Error cleaning files: {e}")
-    # Reset video state
-    if state["cap"]:
-        state["cap"].release()
-    state.update({
-        "video_path": None,
-        "cap": None,
-        "frame": None,
-        "frame_rgb": None,
-        "play": False
-    })
-    return "🔁 Reset done.", None
-# === UI ===
-with gr.Blocks(title="Fast Archery Pose Capture") as app:
-    gr.Markdown("## 🏹 Archery Pose Dataset Tool (Optimized for Speed)")
-    gr.Markdown("⚡ **Optimized**: Instant capture with background pose processing")
-    # Top section - Video loading
-    video_input = gr.Video(label="🎞️ Upload Video")
-    load_btn = gr.Button("📂 Load Video", variant="primary")
-    status = gr.Textbox(label="Status", interactive=False)
-    # Main section - Side by side layout
-    with gr.Row():
-        # Left column - Video display and controls
-        with gr.Column(scale=1):
-            gr.Markdown("### 🎥 Video Player")
-            with gr.Row():
-                play_btn = gr.Button("▶️ Play", variant="secondary")
-                pause_btn = gr.Button("⏸️ Pause", variant="secondary")
-                show_btn = gr.Button("🖼️ Show Frame", variant="secondary")
-            image_output = gr.Image(label="Current Frame", height=400)
-        # Right column - Capture controls
-        with gr.Column(scale=1):
-            gr.Markdown("### 📸 Capture Controls")
-            caption_input = gr.Textbox(label="Caption", placeholder="Describe the pose...", lines=2)
-            capture_btn = gr.Button("📸 Capture & Pause", variant="primary", size="lg")
-            gr.Markdown("### 📊 Dataset Management")
-            with gr.Row():
-                info_btn = gr.Button("📊 Dataset Info")
-                download_btn = gr.Button("📥 Download CSV")
-            reset_btn = gr.Button("🔄 Reset All", variant="stop")
-            dataset_info = gr.Textbox(label="Dataset Info", interactive=False, lines=2)
-    # Bottom section - File download
-    csv_file = gr.File(label="📄 Dataset CSV")
-    # Bind actions
-    load_btn.click(load_video, inputs=video_input, outputs=status)
-    play_btn.click(play_video, outputs=status)
-    pause_btn.click(pause_video, outputs=status)
-    show_btn.click(show_frame, outputs=image_output)
-    capture_btn.click(capture_frame, inputs=caption_input, outputs=[status, image_output])
-    info_btn.click(show_dataset_info, outputs=dataset_info)
-    download_btn.click(download_csv, outputs=csv_file)
-    reset_btn.click(reset_all, outputs=[status, image_output])
-    # Auto-refresh frame display while playing
-    app.load(lambda: None)  # Initialize
-if __name__ == "__main__":
-    app.launch(share=False, server_name="0.0.0.0", server_port=7860)

 import gradio as gr
 import mediapipe as mp
+import cv2
+import numpy as np
+from openai import OpenAI
+import base64
+import tempfile
+import requests
+# Initialize MediaPipe Pose
+mp_pose = mp.solutions.pose
+pose = mp_pose.Pose(static_image_mode=True)
+# Function to extract pose landmarks
+def extract_pose(image):
+    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    results = pose.process(image_rgb)
+    if results.pose_landmarks:
+        pose_data = [
+            {
+                "id": i,
+                "x": lm.x,
+                "y": lm.y,
+                "z": lm.z,
+                "visibility": lm.visibility
+            }
+            for i, lm in enumerate(results.pose_landmarks.landmark)
+        ]
+        return pose_data, image
+    else:
+        return "No pose landmarks found.", image
+# Function to convert image to base64
+def image_to_base64(img_np):
+    _, buffer = cv2.imencode('.jpg', img_np)
+    return base64.b64encode(buffer).decode('utf-8')
+# Call Vision LLM
+def call_llama_vlm(image, pose_data):
+    # Save image to temp and upload to imgbb or similar if needed
+    img_base64 = image_to_base64(image)
+    # Construct data for OpenRouter API
+    client = OpenAI(
+        base_url="https://openrouter.ai/api/v1",
+        api_key="<OPENROUTER_API_KEY>",
+    )
+    completion = client.chat.completions.create(
+        extra_headers={
+            "HTTP-Referer": "<YOUR_SITE_URL>",
+            "X-Title": "<YOUR_SITE_NAME>",
+        },
+        model="meta-llama/llama-3.2-11b-vision-instruct:free",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": f"What is this pose doing? Pose data: {pose_data}"
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{img_base64}"
+                        }
+                    }
+                ]
+            }
+        ]
+    )
+    return completion.choices[0].message.content
+# Gradio Interface
+def process(image):
+    pose_data, img = extract_pose(image)
+    if isinstance(pose_data, str):
+        return pose_data
+    else:
+        description = call_llama_vlm(img, pose_data)
+        return description
+interface = gr.Interface(
+    fn=process,
+    inputs=gr.Image(type="numpy", label="Upload Pose Image"),
+    outputs="text",
+    title="Pose Analysis with MediaPipe and Vision LLM"
+)
+interface.launch()