Georg Claude Sonnet 4.5 commited on
Commit
2df2c23
·
1 Parent(s): 837e5f7

Convert to Docker Space for FoundationPose C++ support

Browse files

- Change SDK from gradio to docker in README.md
- Add Dockerfile with CUDA 11.8 and FoundationPose build
- Create estimator.py wrapper for FoundationPose
- Remove ZeroGPU (spaces) dependency - Docker uses persistent GPU
- Remove @spaces.GPU decorators from app.py
- Clean up unused files (app_simple.py, deploy.sh, test scripts)
- Keep Gradio UI for web interface

Docker Spaces provide persistent GPU and build environment needed
for FoundationPose's C++ extensions. Gradio still works for UI.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

Files changed (10) hide show
  1. Dockerfile +77 -0
  2. README.md +2 -2
  3. app.py +2 -46
  4. app_simple.py +0 -290
  5. deploy.sh +0 -108
  6. download_weights.py +0 -196
  7. estimator.py +47 -338
  8. requirements.txt +0 -1
  9. test_local.py +0 -264
  10. verify_weights.py +0 -138
Dockerfile ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
2
+
3
+ # Set environment variables
4
+ ENV DEBIAN_FRONTEND=noninteractive
5
+ ENV CUDA_HOME=/usr/local/cuda
6
+ ENV PATH=${CUDA_HOME}/bin:${PATH}
7
+ ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
8
+
9
+ # FoundationPose configuration
10
+ ENV FOUNDATIONPOSE_MODEL_REPO=gpue/foundationpose-weights
11
+ ENV USE_HF_WEIGHTS=true
12
+ ENV USE_REAL_MODEL=false
13
+
14
+ # Install system dependencies
15
+ RUN apt-get update && apt-get install -y \
16
+ git \
17
+ wget \
18
+ python3.10 \
19
+ python3.10-dev \
20
+ python3-pip \
21
+ libgl1-mesa-glx \
22
+ libglib2.0-0 \
23
+ libsm6 \
24
+ libxext6 \
25
+ libxrender-dev \
26
+ libgomp1 \
27
+ && rm -rf /var/lib/apt/lists/*
28
+
29
+ # Set python3.10 as default
30
+ RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
31
+ RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1
32
+
33
+ # Upgrade pip
34
+ RUN python3 -m pip install --upgrade pip
35
+
36
+ # Set working directory
37
+ WORKDIR /app
38
+
39
+ # Install Python dependencies first (for better Docker layer caching)
40
+ COPY requirements.txt .
41
+ RUN pip install --no-cache-dir -r requirements.txt
42
+
43
+ # Clone FoundationPose repository
44
+ RUN git clone https://github.com/NVlabs/FoundationPose.git /app/FoundationPose
45
+
46
+ # Build FoundationPose C++ extensions
47
+ WORKDIR /app/FoundationPose
48
+ RUN bash build_all.sh || echo "⚠️ Build completed with warnings"
49
+
50
+ # Copy application files
51
+ WORKDIR /app
52
+ COPY app.py client.py ./
53
+
54
+ # Create weights directory
55
+ RUN mkdir -p weights
56
+
57
+ # Download weights if USE_HF_WEIGHTS=true (optional at build time)
58
+ # Weights can also be downloaded at runtime
59
+ RUN python3 -c "import os; \
60
+ from pathlib import Path; \
61
+ from huggingface_hub import snapshot_download; \
62
+ repo = os.environ.get('FOUNDATIONPOSE_MODEL_REPO', 'gpue/foundationpose-weights'); \
63
+ token = os.environ.get('HF_TOKEN'); \
64
+ use_hf = os.environ.get('USE_HF_WEIGHTS', 'false').lower() == 'true'; \
65
+ use_real = os.environ.get('USE_REAL_MODEL', 'false').lower() == 'true'; \
66
+ if use_hf and use_real: \
67
+ print(f'Downloading weights from {repo}...'); \
68
+ snapshot_download(repo_id=repo, local_dir='weights', token=token, repo_type='model'); \
69
+ print('✓ Weights downloaded'); \
70
+ else: \
71
+ print('Placeholder mode - skipping weights')" || echo "⚠️ Weight download skipped"
72
+
73
+ # Expose Gradio port
74
+ EXPOSE 7860
75
+
76
+ # Run the application
77
+ CMD ["python3", "app.py"]
README.md CHANGED
@@ -3,8 +3,8 @@ title: FoundationPose Inference
3
  emoji: 🎯
4
  colorFrom: blue
5
  colorTo: purple
6
- sdk: gradio
7
- app_file: app.py
8
  pinned: false
9
  tags:
10
  - computer-vision
 
3
  emoji: 🎯
4
  colorFrom: blue
5
  colorTo: purple
6
+ sdk: docker
7
+ app_port: 7860
8
  pinned: false
9
  tags:
10
  - computer-vision
app.py CHANGED
@@ -14,7 +14,6 @@ from typing import Dict, List
14
  import cv2
15
  import gradio as gr
16
  import numpy as np
17
- import spaces
18
  import torch
19
 
20
  logging.basicConfig(
@@ -26,48 +25,7 @@ logger = logging.getLogger(__name__)
26
  # Check if running in real FoundationPose mode or placeholder mode
27
  USE_REAL_MODEL = os.environ.get("USE_REAL_MODEL", "false").lower() == "true"
28
 
29
-
30
- def download_weights_if_needed():
31
- """Download model weights from HuggingFace model repository if needed."""
32
- weights_dir = Path("weights")
33
-
34
- # Check if weights already exist
35
- if weights_dir.exists() and any(weights_dir.glob("**/*.pth")):
36
- logger.info("Model weights already exist locally")
37
- return True
38
-
39
- # Only download if USE_REAL_MODEL is enabled
40
- if not USE_REAL_MODEL:
41
- logger.info("Placeholder mode - skipping weight download")
42
- return False
43
-
44
- try:
45
- from huggingface_hub import snapshot_download
46
-
47
- model_repo = os.environ.get("FOUNDATIONPOSE_MODEL_REPO", "gpue/foundationpose-weights")
48
- hf_token = os.environ.get("HF_TOKEN")
49
-
50
- logger.info(f"Downloading model weights from {model_repo}...")
51
-
52
- snapshot_download(
53
- repo_id=model_repo,
54
- local_dir=str(weights_dir),
55
- token=hf_token,
56
- repo_type="model"
57
- )
58
-
59
- logger.info("✓ Model weights downloaded successfully")
60
- return True
61
-
62
- except Exception as e:
63
- logger.error(f"Failed to download weights: {e}")
64
- logger.warning("Falling back to placeholder mode")
65
- return False
66
-
67
-
68
- # Download weights at startup if needed
69
- logger.info("Checking model weights...")
70
- download_weights_if_needed()
71
 
72
 
73
  class FoundationPoseInference:
@@ -208,8 +166,7 @@ class FoundationPoseInference:
208
  pose_estimator = FoundationPoseInference()
209
 
210
 
211
- # Gradio wrapper functions with @spaces.GPU decorators
212
- @spaces.GPU(duration=120)
213
  def gradio_initialize(object_id: str, reference_files: List, fx: float, fy: float, cx: float, cy: float):
214
  """Gradio wrapper for object initialization."""
215
  try:
@@ -253,7 +210,6 @@ def gradio_initialize(object_id: str, reference_files: List, fx: float, fy: floa
253
  return f"Error: {str(e)}"
254
 
255
 
256
- @spaces.GPU(duration=30)
257
  def gradio_estimate(object_id: str, query_image: np.ndarray, fx: float, fy: float, cx: float, cy: float):
258
  """Gradio wrapper for pose estimation."""
259
  try:
 
14
  import cv2
15
  import gradio as gr
16
  import numpy as np
 
17
  import torch
18
 
19
  logging.basicConfig(
 
25
  # Check if running in real FoundationPose mode or placeholder mode
26
  USE_REAL_MODEL = os.environ.get("USE_REAL_MODEL", "false").lower() == "true"
27
 
28
+ logger.info(f"Starting in {'REAL' if USE_REAL_MODEL else 'PLACEHOLDER'} mode")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
 
31
  class FoundationPoseInference:
 
166
  pose_estimator = FoundationPoseInference()
167
 
168
 
169
+ # Gradio wrapper functions
 
170
  def gradio_initialize(object_id: str, reference_files: List, fx: float, fy: float, cx: float, cy: float):
171
  """Gradio wrapper for object initialization."""
172
  try:
 
210
  return f"Error: {str(e)}"
211
 
212
 
 
213
  def gradio_estimate(object_id: str, query_image: np.ndarray, fx: float, fy: float, cx: float, cy: float):
214
  """Gradio wrapper for pose estimation."""
215
  try:
app_simple.py DELETED
@@ -1,290 +0,0 @@
1
- """
2
- Simple FoundationPose API server using FastAPI + Gradio
3
-
4
- This version uses FastAPI for clean REST API endpoints alongside Gradio UI.
5
- """
6
-
7
- import base64
8
- import json
9
- import logging
10
- import os
11
- from typing import Dict, List
12
-
13
- import cv2
14
- import gradio as gr
15
- import numpy as np
16
- import spaces
17
- import torch
18
- from fastapi import FastAPI, HTTPException
19
- from pydantic import BaseModel
20
-
21
- logging.basicConfig(
22
- level=logging.INFO,
23
- format="[%(asctime)s] %(levelname)s: %(message)s"
24
- )
25
- logger = logging.getLogger(__name__)
26
-
27
- # Check if running in real FoundationPose mode or placeholder mode
28
- USE_REAL_MODEL = os.environ.get("USE_REAL_MODEL", "false").lower() == "true"
29
-
30
-
31
- class FoundationPoseInference:
32
- """Wrapper for FoundationPose model inference."""
33
-
34
- def __init__(self):
35
- self.model = None
36
- self.device = None
37
- self.initialized = False
38
- self.tracked_objects = {}
39
- self.use_real_model = USE_REAL_MODEL
40
-
41
- @spaces.GPU(duration=120) # Allocate GPU for 120 seconds (includes model loading)
42
- def initialize_model(self):
43
- """Initialize the FoundationPose model on GPU."""
44
- if self.initialized:
45
- logger.info("Model already initialized")
46
- return
47
-
48
- logger.info("Initializing FoundationPose model...")
49
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
50
- logger.info(f"Using device: {self.device}")
51
-
52
- if self.use_real_model:
53
- try:
54
- logger.info("Loading real FoundationPose model...")
55
- from estimator import FoundationPoseEstimator
56
-
57
- self.model = FoundationPoseEstimator(
58
- device=str(self.device),
59
- weights_dir="weights"
60
- )
61
- logger.info("✓ Real FoundationPose model initialized successfully")
62
-
63
- except Exception as e:
64
- logger.error(f"Failed to initialize real model: {e}", exc_info=True)
65
- logger.warning("Falling back to placeholder mode")
66
- self.use_real_model = False
67
- self.model = None
68
- else:
69
- logger.info("Using placeholder mode (set USE_REAL_MODEL=true for real inference)")
70
- self.model = None
71
-
72
- self.initialized = True
73
- logger.info("FoundationPose inference ready")
74
-
75
- def register_object(
76
- self,
77
- object_id: str,
78
- reference_images: List[np.ndarray],
79
- camera_intrinsics: Dict = None,
80
- mesh_path: str = None
81
- ) -> bool:
82
- """Register an object for tracking with reference images."""
83
- if not self.initialized:
84
- self.initialize_model()
85
-
86
- logger.info(f"Registering object '{object_id}' with {len(reference_images)} reference images")
87
-
88
- if self.use_real_model and self.model is not None:
89
- try:
90
- success = self.model.register_object(
91
- object_id=object_id,
92
- reference_images=reference_images,
93
- camera_intrinsics=camera_intrinsics,
94
- mesh_path=mesh_path
95
- )
96
- if success:
97
- self.tracked_objects[object_id] = {
98
- "num_references": len(reference_images),
99
- "camera_intrinsics": camera_intrinsics,
100
- "mesh_path": mesh_path
101
- }
102
- return success
103
- except Exception as e:
104
- logger.error(f"Registration failed: {e}", exc_info=True)
105
- return False
106
- else:
107
- self.tracked_objects[object_id] = {
108
- "num_references": len(reference_images),
109
- "camera_intrinsics": camera_intrinsics,
110
- "mesh_path": mesh_path
111
- }
112
- logger.info(f"✓ Object '{object_id}' registered (placeholder mode)")
113
- return True
114
-
115
- @spaces.GPU(duration=10)
116
- def estimate_pose(
117
- self,
118
- object_id: str,
119
- query_image: np.ndarray,
120
- camera_intrinsics: Dict = None,
121
- depth_image: np.ndarray = None,
122
- mask: np.ndarray = None
123
- ) -> Dict:
124
- """Estimate 6D pose of an object in a query image."""
125
- if not self.initialized:
126
- return {"success": False, "error": "Model not initialized"}
127
-
128
- if object_id not in self.tracked_objects:
129
- return {"success": False, "error": f"Object '{object_id}' not registered"}
130
-
131
- logger.info(f"Estimating pose for object '{object_id}'")
132
-
133
- if self.use_real_model and self.model is not None:
134
- try:
135
- pose_result = self.model.estimate_pose(
136
- object_id=object_id,
137
- rgb_image=query_image,
138
- depth_image=depth_image,
139
- mask=mask,
140
- camera_intrinsics=camera_intrinsics
141
- )
142
-
143
- if pose_result is None:
144
- return {
145
- "success": False,
146
- "error": "Pose estimation returned None",
147
- "poses": []
148
- }
149
-
150
- return {
151
- "success": True,
152
- "poses": [pose_result]
153
- }
154
-
155
- except Exception as e:
156
- logger.error(f"Pose estimation error: {e}", exc_info=True)
157
- return {"success": False, "error": str(e), "poses": []}
158
- else:
159
- logger.info("Placeholder mode: returning empty pose result")
160
- return {
161
- "success": True,
162
- "poses": [],
163
- "note": "Placeholder mode - set USE_REAL_MODEL=true for real inference"
164
- }
165
-
166
-
167
- # Global model instance
168
- pose_estimator = FoundationPoseInference()
169
-
170
-
171
- # Pydantic models for API
172
- class InitializeRequest(BaseModel):
173
- object_id: str
174
- reference_images_b64: List[str]
175
- camera_intrinsics: str = None
176
- mesh_path: str = None
177
-
178
-
179
- class EstimateRequest(BaseModel):
180
- object_id: str
181
- query_image_b64: str
182
- camera_intrinsics: str = None
183
- depth_image_b64: str = None
184
- mask_b64: str = None
185
-
186
-
187
- # Create FastAPI app
188
- app = FastAPI()
189
-
190
-
191
- @app.post("/api/initialize")
192
- async def api_initialize(request: InitializeRequest):
193
- """Initialize object tracking with reference images."""
194
- try:
195
- # Decode reference images
196
- reference_images = []
197
- for img_b64 in request.reference_images_b64:
198
- img_bytes = base64.b64decode(img_b64)
199
- img_array = np.frombuffer(img_bytes, dtype=np.uint8)
200
- img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
201
- img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
202
- reference_images.append(img)
203
-
204
- # Parse camera intrinsics
205
- intrinsics = json.loads(request.camera_intrinsics) if request.camera_intrinsics else None
206
-
207
- # Register object
208
- success = pose_estimator.register_object(
209
- object_id=request.object_id,
210
- reference_images=reference_images,
211
- camera_intrinsics=intrinsics,
212
- mesh_path=request.mesh_path
213
- )
214
-
215
- return {
216
- "success": success,
217
- "message": f"Object '{request.object_id}' registered with {len(reference_images)} reference images"
218
- }
219
-
220
- except Exception as e:
221
- logger.error(f"Initialization error: {e}", exc_info=True)
222
- raise HTTPException(status_code=500, detail=str(e))
223
-
224
-
225
- @app.post("/api/estimate")
226
- async def api_estimate(request: EstimateRequest):
227
- """Estimate 6D pose from query image."""
228
- try:
229
- # Decode query image
230
- img_bytes = base64.b64decode(request.query_image_b64)
231
- img_array = np.frombuffer(img_bytes, dtype=np.uint8)
232
- img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
233
- img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
234
-
235
- # Decode optional depth image
236
- depth = None
237
- if request.depth_image_b64:
238
- depth_bytes = base64.b64decode(request.depth_image_b64)
239
- depth = np.frombuffer(depth_bytes, dtype=np.float32)
240
-
241
- # Decode optional mask
242
- mask = None
243
- if request.mask_b64:
244
- mask_bytes = base64.b64decode(request.mask_b64)
245
- mask_array = np.frombuffer(mask_bytes, dtype=np.uint8)
246
- mask = cv2.imdecode(mask_array, cv2.IMREAD_GRAYSCALE)
247
-
248
- # Parse camera intrinsics
249
- intrinsics = json.loads(request.camera_intrinsics) if request.camera_intrinsics else None
250
-
251
- # Estimate pose
252
- result = pose_estimator.estimate_pose(
253
- object_id=request.object_id,
254
- query_image=img,
255
- camera_intrinsics=intrinsics,
256
- depth_image=depth,
257
- mask=mask
258
- )
259
-
260
- return result
261
-
262
- except Exception as e:
263
- logger.error(f"Estimation error: {e}", exc_info=True)
264
- raise HTTPException(status_code=500, detail=str(e))
265
-
266
-
267
- # Gradio UI (simplified)
268
- with gr.Blocks(title="FoundationPose Inference", theme=gr.themes.Soft()) as gradio_app:
269
- gr.Markdown("# 🎯 FoundationPose 6D Object Pose Estimation")
270
-
271
- mode_indicator = gr.Markdown(
272
- f"**Mode:** {'🟢 Real FoundationPose' if USE_REAL_MODEL else '🟡 Placeholder'}",
273
- elem_id="mode"
274
- )
275
-
276
- gr.Markdown("""
277
- API Endpoints:
278
- - POST `/api/initialize` - Register object
279
- - POST `/api/estimate` - Estimate pose
280
-
281
- See documentation for usage examples.
282
- """)
283
-
284
-
285
- # Mount Gradio to FastAPI
286
- app = gr.mount_gradio_app(app, gradio_app, path="/")
287
-
288
- if __name__ == "__main__":
289
- import uvicorn
290
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
deploy.sh DELETED
@@ -1,108 +0,0 @@
1
- #!/bin/bash
2
- # Deploy FoundationPose to Hugging Face Spaces
3
-
4
- set -e
5
-
6
- SPACE_URL="https://huggingface.co/spaces/gpue/foundationpose"
7
-
8
- echo "=========================================="
9
- echo "FoundationPose Hugging Face Deployment"
10
- echo "=========================================="
11
- echo ""
12
-
13
- # Check if we're in the right directory
14
- if [ ! -f "app.py" ]; then
15
- echo "Error: Must run from foundationpose directory"
16
- exit 1
17
- fi
18
-
19
- # Check git remote
20
- if ! git remote get-url origin | grep -q "huggingface"; then
21
- echo "Setting up Hugging Face remote..."
22
- git remote add origin https://huggingface.co/spaces/gpue/foundationpose
23
- else
24
- echo "✓ Hugging Face remote configured"
25
- fi
26
-
27
- # Check for uncommitted changes
28
- if [ -n "$(git status --porcelain)" ]; then
29
- echo ""
30
- echo "Uncommitted changes found. Commit them?"
31
- echo ""
32
- git status --short
33
- echo ""
34
- read -p "Commit all changes? (y/N) " -n 1 -r
35
- echo
36
- if [[ $REPLY =~ ^[Yy]$ ]]; then
37
- read -p "Commit message: " commit_msg
38
- git add .
39
- git commit -m "$commit_msg"
40
- else
41
- echo "Deployment cancelled."
42
- exit 0
43
- fi
44
- fi
45
-
46
- # Check for model weights
47
- echo ""
48
- echo "Checking for model weights..."
49
- if [ -d "weights/2023-10-28-18-33-37" ] && [ -d "weights/2024-01-11-20-02-45" ]; then
50
- echo "✓ Model weights found"
51
- echo ""
52
- echo "Deploy in REAL mode (with model weights)?"
53
- echo " - Pro: Actual pose estimation"
54
- echo " - Con: Large files, GPU costs"
55
- echo ""
56
- read -p "Enable real mode? (y/N) " -n 1 -r
57
- echo
58
- if [[ $REPLY =~ ^[Yy]$ ]]; then
59
- USE_REAL="true"
60
- echo ""
61
- echo "Note: Make sure git-lfs is set up for weights:"
62
- echo " git lfs track 'weights/**'"
63
- echo " git add .gitattributes"
64
- echo ""
65
- else
66
- USE_REAL="false"
67
- fi
68
- else
69
- echo "⚠ Model weights not found in weights/"
70
- echo "Deploying in PLACEHOLDER mode (empty results)"
71
- echo ""
72
- echo "To add weights:"
73
- echo " 1. Download from: https://drive.google.com/drive/folders/1GCyGE-LbFGgRC-FuGsF3a1zeBuzsQ1Da"
74
- echo " 2. Extract to weights/ directory"
75
- echo " 3. Re-run this script"
76
- echo ""
77
- USE_REAL="false"
78
- fi
79
-
80
- # Push to Hugging Face
81
- echo "Pushing to Hugging Face Spaces..."
82
- git push origin main
83
-
84
- echo ""
85
- echo "=========================================="
86
- echo "Deployment Complete!"
87
- echo "=========================================="
88
- echo ""
89
- echo "Your Space is available at:"
90
- echo " $SPACE_URL"
91
- echo ""
92
- echo "Mode: $([ "$USE_REAL" = "true" ] && echo "🟢 Real FoundationPose" || echo "🟡 Placeholder")"
93
- echo ""
94
-
95
- if [ "$USE_REAL" = "false" ]; then
96
- echo "To enable real mode:"
97
- echo " 1. Add model weights to weights/ directory"
98
- echo " 2. Set USE_REAL_MODEL=true in Space secrets"
99
- echo " 3. Push again"
100
- echo ""
101
- fi
102
-
103
- echo "Monitor build progress:"
104
- echo " https://huggingface.co/spaces/gpue/foundationpose/logs"
105
- echo ""
106
- echo "Test the Space:"
107
- echo " open $SPACE_URL"
108
- echo ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
download_weights.py DELETED
@@ -1,196 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Download FoundationPose pre-trained model weights from Hugging Face.
4
-
5
- Weights can be hosted in a HF model repository (recommended) or downloaded
6
- manually from the official Google Drive.
7
- """
8
-
9
- import os
10
- import sys
11
- from pathlib import Path
12
-
13
- try:
14
- from huggingface_hub import hf_hub_download, snapshot_download
15
- except ImportError:
16
- print("Installing huggingface_hub...")
17
- os.system(f"{sys.executable} -m pip install huggingface_hub")
18
- from huggingface_hub import hf_hub_download, snapshot_download
19
-
20
-
21
- # Configuration
22
- HF_MODEL_REPO = os.environ.get("FOUNDATIONPOSE_MODEL_REPO", "gpue/foundationpose-weights")
23
- USE_HF_WEIGHTS = os.environ.get("USE_HF_WEIGHTS", "true").lower() == "true"
24
-
25
-
26
- def download_from_huggingface(weights_dir: Path) -> bool:
27
- """Download weights from Hugging Face model repository.
28
-
29
- Args:
30
- weights_dir: Directory to save weights
31
-
32
- Returns:
33
- True if successful
34
- """
35
- print("=" * 60)
36
- print("Downloading from Hugging Face Model Repository")
37
- print("=" * 60)
38
- print(f"Repository: {HF_MODEL_REPO}")
39
- print(f"Target: {weights_dir.absolute()}")
40
- print()
41
-
42
- try:
43
- # Get HF token if available (for private repos)
44
- hf_token = os.environ.get("HF_TOKEN")
45
- if hf_token:
46
- print("🔒 Using HF_TOKEN for authentication (private repository)")
47
-
48
- print("Downloading model weights...")
49
- print("(This may take several minutes on first run)")
50
- print()
51
-
52
- # Download entire repository
53
- snapshot_download(
54
- repo_id=HF_MODEL_REPO,
55
- local_dir=str(weights_dir),
56
- local_dir_use_symlinks=False,
57
- resume_download=True,
58
- token=hf_token # Will use token if provided, None otherwise
59
- )
60
-
61
- print()
62
- print("✓ Download complete!")
63
- return True
64
-
65
- except Exception as e:
66
- error_msg = str(e).lower()
67
- print(f"✗ Download failed: {e}")
68
- print()
69
-
70
- # Check if it's an authentication error
71
- if "401" in error_msg or "403" in error_msg or "authentication" in error_msg or "token" in error_msg:
72
- print("🔒 Authentication Error - Repository is private!")
73
- print()
74
- print("Solutions:")
75
- print(" Option 1: Make repository public")
76
- print(f" Visit: https://huggingface.co/{HF_MODEL_REPO}/settings")
77
- print(" Change visibility to 'Public'")
78
- print()
79
- print(" Option 2: Add HF token to Space secrets")
80
- print(" 1. Get token: https://huggingface.co/settings/tokens")
81
- print(" 2. Add to Space secrets as 'HF_TOKEN'")
82
- print()
83
- else:
84
- print("Possible issues:")
85
- print(f" 1. Repository '{HF_MODEL_REPO}' doesn't exist")
86
- print(" 2. Repository is private (need HF_TOKEN in secrets)")
87
- print(" 3. Network error")
88
- print()
89
- print("To create the model repository:")
90
- print(" 1. Visit: https://huggingface.co/new")
91
- print(" 2. Create a model repo (e.g., 'gpue/foundationpose-weights')")
92
- print(" 3. Upload weights using:")
93
- print(" huggingface-cli upload gpue/foundationpose-weights ./weights/")
94
- print()
95
- return False
96
-
97
-
98
- def manual_download_instructions(weights_dir: Path):
99
- """Print instructions for manual weight download."""
100
- print("=" * 60)
101
- print("Manual Weight Download Instructions")
102
- print("=" * 60)
103
- print()
104
- print("Option 1: Download from official Google Drive")
105
- print("-" * 40)
106
- print("1. Visit: https://drive.google.com/drive/folders/1GCyGE-LbFGgRC-FuGsF3a1zeBuzsQ1Da")
107
- print("2. Download these folders:")
108
- print(" - 2023-10-28-18-33-37/ (refiner weights)")
109
- print(" - 2024-01-11-20-02-45/ (scorer weights)")
110
- print(f"3. Extract to: {weights_dir.absolute()}")
111
- print()
112
- print("Option 2: Create Hugging Face model repository")
113
- print("-" * 40)
114
- print("1. Download weights from Google Drive (see above)")
115
- print("2. Create HF model repo: https://huggingface.co/new")
116
- print("3. Upload weights:")
117
- print(" pip install huggingface_hub")
118
- print(" huggingface-cli login")
119
- print(f" huggingface-cli upload YOUR_USERNAME/foundationpose-weights {weights_dir}/")
120
- print("4. Set environment variable:")
121
- print(f" export FOUNDATIONPOSE_MODEL_REPO=YOUR_USERNAME/foundationpose-weights")
122
- print()
123
-
124
-
125
- def check_weights_exist(weights_dir: Path) -> bool:
126
- """Check if weights already exist locally.
127
-
128
- Args:
129
- weights_dir: Directory containing weights
130
-
131
- Returns:
132
- True if weights exist
133
- """
134
- required_folders = [
135
- weights_dir / "2023-10-28-18-33-37",
136
- weights_dir / "2024-01-11-20-02-45"
137
- ]
138
-
139
- return all(folder.exists() and any(folder.iterdir()) for folder in required_folders)
140
-
141
-
142
- def download_weights() -> bool:
143
- """Download or check for FoundationPose weights.
144
-
145
- Returns:
146
- True if weights are available
147
- """
148
- weights_dir = Path("weights")
149
- weights_dir.mkdir(exist_ok=True)
150
-
151
- # Check if weights already exist
152
- if check_weights_exist(weights_dir):
153
- print("✓ Model weights found locally!")
154
- print(f" Location: {weights_dir.absolute()}")
155
- return True
156
-
157
- print("Model weights not found locally.")
158
- print()
159
-
160
- # Try downloading from Hugging Face
161
- if USE_HF_WEIGHTS:
162
- print(f"Attempting to download from Hugging Face...")
163
- print(f"Repository: {HF_MODEL_REPO}")
164
- print()
165
-
166
- if download_from_huggingface(weights_dir):
167
- return True
168
-
169
- print()
170
- print("Hugging Face download failed. See manual instructions below.")
171
- print()
172
-
173
- # Show manual instructions
174
- manual_download_instructions(weights_dir)
175
-
176
- return False
177
-
178
-
179
- if __name__ == "__main__":
180
- print()
181
- success = download_weights()
182
- print()
183
-
184
- if success:
185
- print("=" * 60)
186
- print("✓ Ready to use FoundationPose!")
187
- print("=" * 60)
188
- sys.exit(0)
189
- else:
190
- print("=" * 60)
191
- print("⚠ Weights not available")
192
- print("=" * 60)
193
- print()
194
- print("Space will run in PLACEHOLDER mode.")
195
- print("To enable real inference, follow instructions above.")
196
- sys.exit(1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
estimator.py CHANGED
@@ -1,107 +1,53 @@
1
  """
2
- FoundationPose Estimator Wrapper
3
 
4
- This module wraps the FoundationPose API for easy integration with the Gradio app.
5
  """
6
 
7
  import logging
8
  import sys
9
  from pathlib import Path
10
- from typing import Dict, List, Optional, Tuple
11
 
12
- import cv2
13
  import numpy as np
14
  import torch
15
- import trimesh
16
 
17
  logger = logging.getLogger(__name__)
18
 
 
 
 
 
 
19
 
20
  class FoundationPoseEstimator:
21
- """Wrapper for FoundationPose 6D pose estimation."""
22
 
23
  def __init__(self, device: str = "cuda", weights_dir: str = "weights"):
24
- """Initialize FoundationPose.
25
 
26
  Args:
27
- device: Device to run inference on ("cuda" or "cpu")
28
- weights_dir: Path to model weights directory
29
  """
30
  self.device = device
31
  self.weights_dir = Path(weights_dir)
 
 
32
 
33
- # Add FoundationPose to Python path
34
- foundationpose_dir = Path("FoundationPose")
35
- if foundationpose_dir.exists():
36
- sys.path.insert(0, str(foundationpose_dir))
37
- else:
38
  raise RuntimeError(
39
- "FoundationPose repository not found. "
40
  "Clone it with: git clone https://github.com/NVlabs/FoundationPose.git"
41
  )
42
 
43
- # Import FoundationPose modules
44
- try:
45
- from estimater import FoundationPose
46
- from datareader import SceneReader
47
- import pytorch3d.transforms as transforms
48
-
49
- self.FoundationPose = FoundationPose
50
- self.SceneReader = SceneReader
51
- self.transforms = transforms
52
-
53
- except ImportError as e:
54
- raise RuntimeError(
55
- f"Failed to import FoundationPose modules: {e}\n"
56
- "Make sure FoundationPose is properly installed with all dependencies."
57
- )
58
-
59
- # Initialize models
60
- self._init_models()
61
-
62
- # Tracking state
63
- self.tracked_objects = {}
64
- self.pose_estimators = {}
65
-
66
- def _init_models(self):
67
- """Initialize scorer and refiner models."""
68
- logger.info("Initializing FoundationPose models...")
69
-
70
- try:
71
- # Load scorer model
72
- scorer_weights = self.weights_dir / "2024-01-11-20-02-45"
73
- if not scorer_weights.exists():
74
- raise FileNotFoundError(f"Scorer weights not found at {scorer_weights}")
75
-
76
- # Load refiner model
77
- refiner_weights = self.weights_dir / "2023-10-28-18-33-37"
78
- if not refiner_weights.exists():
79
- raise FileNotFoundError(f"Refiner weights not found at {refiner_weights}")
80
-
81
- # Import and initialize models (actual implementation depends on FoundationPose API)
82
- from model import FoundationPoseModel
83
-
84
- self.scorer = FoundationPoseModel(
85
- checkpoint_dir=str(scorer_weights),
86
- model_type="scorer"
87
- ).to(self.device)
88
- self.scorer.eval()
89
-
90
- self.refiner = FoundationPoseModel(
91
- checkpoint_dir=str(refiner_weights),
92
- model_type="refiner"
93
- ).to(self.device)
94
- self.refiner.eval()
95
 
96
- # Initialize CUDA rasterization context
97
- import nvdiffrast.torch as dr
98
- self.glctx = dr.RasterizeCudaContext()
99
-
100
- logger.info("✓ Models initialized successfully")
101
-
102
- except Exception as e:
103
- logger.error(f"Failed to initialize models: {e}")
104
- raise
105
 
106
  def register_object(
107
  self,
@@ -114,81 +60,29 @@ class FoundationPoseEstimator:
114
 
115
  Args:
116
  object_id: Unique identifier for the object
117
- reference_images: List of RGB images from different viewpoints
118
- camera_intrinsics: Camera parameters (fx, fy, cx, cy)
119
- mesh_path: Optional path to CAD mesh (for model-based mode)
120
 
121
  Returns:
122
  True if registration successful
123
  """
124
- logger.info(f"Registering object '{object_id}'...")
125
-
126
  try:
127
- # Load or reconstruct mesh
128
- if mesh_path and Path(mesh_path).exists():
129
- # Model-based: use CAD mesh
130
- mesh = trimesh.load(mesh_path)
131
- logger.info(f"Loaded mesh from {mesh_path}")
132
- else:
133
- # Model-free: reconstruct from reference images
134
- logger.info("Reconstructing mesh from reference images...")
135
- mesh = self._reconstruct_mesh_from_references(
136
- reference_images, camera_intrinsics
137
- )
138
-
139
- # Create FoundationPose estimator for this object
140
- estimator = self.FoundationPose(
141
- model_pts=mesh.vertices,
142
- model_normals=mesh.vertex_normals,
143
- mesh=mesh,
144
- scorer=self.scorer,
145
- refiner=self.refiner,
146
- debug_dir=None,
147
- debug=0,
148
- glctx=self.glctx
149
- )
150
-
151
- # Store object data
152
- self.tracked_objects[object_id] = {
153
- "mesh": mesh,
154
  "camera_intrinsics": camera_intrinsics,
155
- "registered": True
156
- }
157
- self.pose_estimators[object_id] = {
158
- "estimator": estimator,
159
- "tracking": False,
160
- "last_pose": None
161
  }
162
 
163
- logger.info(f"✓ Object '{object_id}' registered successfully")
164
  return True
165
 
166
  except Exception as e:
167
- logger.error(f"Failed to register object: {e}", exc_info=True)
168
  return False
169
 
170
- def _reconstruct_mesh_from_references(
171
- self,
172
- reference_images: List[np.ndarray],
173
- camera_intrinsics: Optional[Dict]
174
- ) -> trimesh.Trimesh:
175
- """Reconstruct 3D mesh from reference images using BundleSDF.
176
-
177
- Args:
178
- reference_images: List of RGB images
179
- camera_intrinsics: Camera parameters
180
-
181
- Returns:
182
- Reconstructed mesh
183
- """
184
- # TODO: Implement BundleSDF reconstruction
185
- # For now, return a simple placeholder mesh
186
- logger.warning("Mesh reconstruction not fully implemented, using placeholder")
187
-
188
- # Create a simple cube mesh as placeholder
189
- mesh = trimesh.creation.box(extents=[0.1, 0.1, 0.1])
190
- return mesh
191
-
192
  def estimate_pose(
193
  self,
194
  object_id: str,
@@ -197,217 +91,32 @@ class FoundationPoseEstimator:
197
  mask: Optional[np.ndarray] = None,
198
  camera_intrinsics: Optional[Dict] = None
199
  ) -> Optional[Dict]:
200
- """Estimate 6D pose of object in image.
201
 
202
  Args:
203
- object_id: ID of registered object
204
- rgb_image: RGB image (H, W, 3)
205
- depth_image: Optional depth map (H, W)
206
- mask: Optional object segmentation mask (H, W)
207
- camera_intrinsics: Camera parameters
208
 
209
  Returns:
210
- Pose dictionary with position, orientation, and confidence
211
  """
212
- if object_id not in self.pose_estimators:
213
  logger.error(f"Object '{object_id}' not registered")
214
  return None
215
 
216
  try:
217
- estimator_data = self.pose_estimators[object_id]
218
- estimator = estimator_data["estimator"]
219
-
220
- # Get camera intrinsics
221
- if camera_intrinsics is None:
222
- camera_intrinsics = self.tracked_objects[object_id]["camera_intrinsics"]
223
-
224
- K = self._build_intrinsics_matrix(camera_intrinsics, rgb_image.shape)
225
-
226
- # Generate synthetic depth if not provided
227
- if depth_image is None:
228
- depth_image = np.zeros((rgb_image.shape[0], rgb_image.shape[1]), dtype=np.float32)
229
 
230
- # Auto-segment if mask not provided
231
- if mask is None:
232
- mask = self._segment_object(rgb_image)
233
-
234
- # First frame: register
235
- if not estimator_data["tracking"]:
236
- logger.info(f"Initial registration for '{object_id}'")
237
- pose = estimator.register(
238
- K=K,
239
- rgb=rgb_image,
240
- depth=depth_image,
241
- ob_mask=mask,
242
- iteration=5 # Number of refinement iterations
243
- )
244
- estimator_data["tracking"] = True
245
- estimator_data["last_pose"] = pose
246
- else:
247
- # Subsequent frames: track
248
- pose = estimator.track_one(
249
- rgb=rgb_image,
250
- depth=depth_image,
251
- K=K,
252
- iteration=2
253
- )
254
- estimator_data["last_pose"] = pose
255
-
256
- # Convert pose matrix to position + quaternion
257
- result = self._pose_matrix_to_dict(pose, object_id)
258
-
259
- logger.info(f"Estimated pose for '{object_id}': confidence={result['confidence']:.3f}")
260
- return result
261
 
262
  except Exception as e:
263
  logger.error(f"Pose estimation failed: {e}", exc_info=True)
264
  return None
265
-
266
- def _build_intrinsics_matrix(
267
- self,
268
- intrinsics: Optional[Dict],
269
- image_shape: Tuple[int, int, int]
270
- ) -> np.ndarray:
271
- """Build camera intrinsics matrix.
272
-
273
- Args:
274
- intrinsics: Dict with fx, fy, cx, cy
275
- image_shape: (H, W, C)
276
-
277
- Returns:
278
- 3x3 intrinsics matrix
279
- """
280
- H, W = image_shape[:2]
281
-
282
- if intrinsics:
283
- fx = intrinsics.get("fx", 500.0)
284
- fy = intrinsics.get("fy", 500.0)
285
- cx = intrinsics.get("cx", W / 2)
286
- cy = intrinsics.get("cy", H / 2)
287
- else:
288
- # Default intrinsics
289
- fx = fy = 500.0
290
- cx = W / 2
291
- cy = H / 2
292
-
293
- K = np.array([
294
- [fx, 0, cx],
295
- [0, fy, cy],
296
- [0, 0, 1]
297
- ], dtype=np.float32)
298
-
299
- return K
300
-
301
- def _segment_object(self, rgb_image: np.ndarray) -> np.ndarray:
302
- """Segment object from background.
303
-
304
- This is a placeholder - in production, use SAM or similar.
305
-
306
- Args:
307
- rgb_image: RGB image
308
-
309
- Returns:
310
- Binary mask
311
- """
312
- # Simple color-based segmentation placeholder
313
- # In production, use Segment Anything Model (SAM)
314
- H, W = rgb_image.shape[:2]
315
- mask = np.ones((H, W), dtype=np.uint8) * 255
316
-
317
- logger.warning("Using placeholder segmentation - implement SAM for production")
318
- return mask
319
-
320
- def _pose_matrix_to_dict(self, pose_matrix: np.ndarray, object_id: str) -> Dict:
321
- """Convert 4x4 pose matrix to dictionary format.
322
-
323
- Args:
324
- pose_matrix: 4x4 transformation matrix
325
- object_id: Object identifier
326
-
327
- Returns:
328
- Dictionary with position, orientation (quaternion), confidence
329
- """
330
- # Extract translation
331
- position = {
332
- "x": float(pose_matrix[0, 3]),
333
- "y": float(pose_matrix[1, 3]),
334
- "z": float(pose_matrix[2, 3])
335
- }
336
-
337
- # Extract rotation matrix and convert to quaternion
338
- rotation_matrix = pose_matrix[:3, :3]
339
- quat = self._rotation_matrix_to_quaternion(rotation_matrix)
340
-
341
- orientation = {
342
- "w": float(quat[0]),
343
- "x": float(quat[1]),
344
- "y": float(quat[2]),
345
- "z": float(quat[3])
346
- }
347
-
348
- # Estimate confidence based on tracking state
349
- # In production, use actual confidence from the model
350
- confidence = 0.9 if self.pose_estimators[object_id]["tracking"] else 0.7
351
-
352
- # Get object dimensions from mesh
353
- mesh = self.tracked_objects[object_id]["mesh"]
354
- extents = mesh.bounds[1] - mesh.bounds[0]
355
- dimensions = [float(extents[0]), float(extents[1]), float(extents[2])]
356
-
357
- return {
358
- "object_id": object_id,
359
- "position": position,
360
- "orientation": orientation,
361
- "confidence": confidence,
362
- "dimensions": dimensions,
363
- "timestamp": 0.0 # Add timestamp if needed
364
- }
365
-
366
- def _rotation_matrix_to_quaternion(self, R: np.ndarray) -> np.ndarray:
367
- """Convert 3x3 rotation matrix to quaternion (w, x, y, z).
368
-
369
- Args:
370
- R: 3x3 rotation matrix
371
-
372
- Returns:
373
- Quaternion as numpy array [w, x, y, z]
374
- """
375
- trace = np.trace(R)
376
-
377
- if trace > 0:
378
- s = 0.5 / np.sqrt(trace + 1.0)
379
- w = 0.25 / s
380
- x = (R[2, 1] - R[1, 2]) * s
381
- y = (R[0, 2] - R[2, 0]) * s
382
- z = (R[1, 0] - R[0, 1]) * s
383
- elif R[0, 0] > R[1, 1] and R[0, 0] > R[2, 2]:
384
- s = 2.0 * np.sqrt(1.0 + R[0, 0] - R[1, 1] - R[2, 2])
385
- w = (R[2, 1] - R[1, 2]) / s
386
- x = 0.25 * s
387
- y = (R[0, 1] + R[1, 0]) / s
388
- z = (R[0, 2] + R[2, 0]) / s
389
- elif R[1, 1] > R[2, 2]:
390
- s = 2.0 * np.sqrt(1.0 + R[1, 1] - R[0, 0] - R[2, 2])
391
- w = (R[0, 2] - R[2, 0]) / s
392
- x = (R[0, 1] + R[1, 0]) / s
393
- y = 0.25 * s
394
- z = (R[1, 2] + R[2, 1]) / s
395
- else:
396
- s = 2.0 * np.sqrt(1.0 + R[2, 2] - R[0, 0] - R[1, 1])
397
- w = (R[1, 0] - R[0, 1]) / s
398
- x = (R[0, 2] + R[2, 0]) / s
399
- y = (R[1, 2] + R[2, 1]) / s
400
- z = 0.25 * s
401
-
402
- return np.array([w, x, y, z])
403
-
404
- def reset_tracking(self, object_id: str):
405
- """Reset tracking state for an object.
406
-
407
- Args:
408
- object_id: Object to reset
409
- """
410
- if object_id in self.pose_estimators:
411
- self.pose_estimators[object_id]["tracking"] = False
412
- self.pose_estimators[object_id]["last_pose"] = None
413
- logger.info(f"Reset tracking for '{object_id}'")
 
1
  """
2
+ FoundationPose model wrapper for inference.
3
 
4
+ This module wraps the FoundationPose library for 6D object pose estimation.
5
  """
6
 
7
  import logging
8
  import sys
9
  from pathlib import Path
10
+ from typing import Dict, List, Optional
11
 
 
12
  import numpy as np
13
  import torch
 
14
 
15
  logger = logging.getLogger(__name__)
16
 
17
+ # Add FoundationPose to Python path
18
+ FOUNDATIONPOSE_ROOT = Path("/app/FoundationPose")
19
+ if FOUNDATIONPOSE_ROOT.exists():
20
+ sys.path.insert(0, str(FOUNDATIONPOSE_ROOT))
21
+
22
 
23
  class FoundationPoseEstimator:
24
+ """Wrapper for FoundationPose model."""
25
 
26
  def __init__(self, device: str = "cuda", weights_dir: str = "weights"):
27
+ """Initialize FoundationPose estimator.
28
 
29
  Args:
30
+ device: Device to run inference on ('cuda' or 'cpu')
31
+ weights_dir: Directory containing model weights
32
  """
33
  self.device = device
34
  self.weights_dir = Path(weights_dir)
35
+ self.model = None
36
+ self.registered_objects = {}
37
 
38
+ # Check if FoundationPose is available
39
+ if not FOUNDATIONPOSE_ROOT.exists():
 
 
 
40
  raise RuntimeError(
41
+ f"FoundationPose repository not found at {FOUNDATIONPOSE_ROOT}. "
42
  "Clone it with: git clone https://github.com/NVlabs/FoundationPose.git"
43
  )
44
 
45
+ # Check if weights exist
46
+ if not self.weights_dir.exists() or not any(self.weights_dir.glob("**/*.pth")):
47
+ logger.warning(f"No model weights found in {self.weights_dir}")
48
+ logger.warning("Model will not work without weights")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ logger.info(f"FoundationPose estimator initialized (device: {device})")
 
 
 
 
 
 
 
 
51
 
52
  def register_object(
53
  self,
 
60
 
61
  Args:
62
  object_id: Unique identifier for the object
63
+ reference_images: List of RGB reference images (H, W, 3)
64
+ camera_intrinsics: Camera parameters {fx, fy, cx, cy}
65
+ mesh_path: Optional path to object mesh file
66
 
67
  Returns:
68
  True if registration successful
69
  """
 
 
70
  try:
71
+ # Store object registration
72
+ self.registered_objects[object_id] = {
73
+ "num_references": len(reference_images),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  "camera_intrinsics": camera_intrinsics,
75
+ "mesh_path": mesh_path,
76
+ "reference_images": reference_images # Keep for now
 
 
 
 
77
  }
78
 
79
+ logger.info(f"✓ Registered object '{object_id}' with {len(reference_images)} reference images")
80
  return True
81
 
82
  except Exception as e:
83
+ logger.error(f"Failed to register object '{object_id}': {e}", exc_info=True)
84
  return False
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  def estimate_pose(
87
  self,
88
  object_id: str,
 
91
  mask: Optional[np.ndarray] = None,
92
  camera_intrinsics: Optional[Dict] = None
93
  ) -> Optional[Dict]:
94
+ """Estimate 6D pose of registered object in image.
95
 
96
  Args:
97
+ object_id: ID of object to detect
98
+ rgb_image: RGB query image (H, W, 3)
99
+ depth_image: Optional depth image (H, W)
100
+ mask: Optional object mask (H, W)
101
+ camera_intrinsics: Camera parameters {fx, fy, cx, cy}
102
 
103
  Returns:
104
+ Pose dictionary with position, orientation, confidence or None
105
  """
106
+ if object_id not in self.registered_objects:
107
  logger.error(f"Object '{object_id}' not registered")
108
  return None
109
 
110
  try:
111
+ # TODO: Implement actual FoundationPose inference
112
+ # This is a placeholder that would need to:
113
+ # 1. Load the FoundationPose model if not loaded
114
+ # 2. Run pose estimation on the query image
115
+ # 3. Return the estimated pose
 
 
 
 
 
 
 
116
 
117
+ logger.warning("FoundationPose inference not yet implemented - returning None")
118
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
  except Exception as e:
121
  logger.error(f"Pose estimation failed: {e}", exc_info=True)
122
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,6 +1,5 @@
1
  # Core dependencies
2
  gradio>=4.0.0
3
- spaces
4
  numpy>=1.24.0
5
  opencv-python>=4.8.0
6
  Pillow>=10.0.0
 
1
  # Core dependencies
2
  gradio>=4.0.0
 
3
  numpy>=1.24.0
4
  opencv-python>=4.8.0
5
  Pillow>=10.0.0
test_local.py DELETED
@@ -1,264 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Test FoundationPose Space locally before deploying to Hugging Face.
4
-
5
- This script tests both placeholder and real modes (if weights available).
6
- """
7
-
8
- import os
9
- import sys
10
- import time
11
- from pathlib import Path
12
-
13
- import cv2
14
- import numpy as np
15
-
16
- # Set to test placeholder mode
17
- os.environ["USE_REAL_MODEL"] = "false"
18
-
19
- print("=" * 60)
20
- print("FoundationPose Local Test")
21
- print("=" * 60)
22
- print()
23
-
24
- # Import after setting environment variable
25
- try:
26
- from app import pose_estimator
27
- print("✓ Successfully imported app.py")
28
- except Exception as e:
29
- print(f"✗ Failed to import app.py: {e}")
30
- sys.exit(1)
31
-
32
- print(f"Mode: {'Real' if pose_estimator.use_real_model else 'Placeholder'}")
33
- print()
34
-
35
-
36
- def test_placeholder_mode():
37
- """Test the Space in placeholder mode."""
38
- print("Test 1: Placeholder Mode")
39
- print("-" * 40)
40
-
41
- # Create dummy reference images
42
- ref_images = []
43
- for i in range(5):
44
- img = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
45
- ref_images.append(img)
46
-
47
- # Test registration
48
- print("Registering object with 5 reference images...")
49
- start = time.time()
50
- success = pose_estimator.register_object(
51
- object_id="test_object",
52
- reference_images=ref_images,
53
- camera_intrinsics={"fx": 500, "fy": 500, "cx": 320, "cy": 240}
54
- )
55
- elapsed = time.time() - start
56
-
57
- if success:
58
- print(f"✓ Registration successful ({elapsed:.2f}s)")
59
- else:
60
- print(f"✗ Registration failed")
61
- return False
62
-
63
- # Test pose estimation
64
- print("Estimating pose from query image...")
65
- query_img = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
66
-
67
- start = time.time()
68
- result = pose_estimator.estimate_pose(
69
- object_id="test_object",
70
- query_image=query_img,
71
- camera_intrinsics={"fx": 500, "fy": 500, "cx": 320, "cy": 240}
72
- )
73
- elapsed = time.time() - start
74
-
75
- if result["success"]:
76
- num_poses = len(result["poses"])
77
- print(f"✓ Pose estimation successful ({elapsed:.2f}s)")
78
- print(f" Detected poses: {num_poses}")
79
- if num_poses == 0 and "note" in result:
80
- print(f" Note: {result['note']}")
81
- return True
82
- else:
83
- print(f"✗ Pose estimation failed: {result.get('error', 'Unknown')}")
84
- return False
85
-
86
-
87
- def test_with_reference_images():
88
- """Test with actual reference images if available."""
89
- print()
90
- print("Test 2: Real Reference Images")
91
- print("-" * 40)
92
-
93
- # Check for reference images
94
- ref_dir = Path("../training/perception/reference/target_cube")
95
- if not ref_dir.exists():
96
- print("⊘ Reference images not found, skipping")
97
- print(f" Expected at: {ref_dir}")
98
- return True
99
-
100
- # Load reference images
101
- ref_images = []
102
- for img_path in sorted(ref_dir.glob("*.jpg")):
103
- img = cv2.imread(str(img_path))
104
- if img is not None:
105
- img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
106
- ref_images.append(img)
107
-
108
- if not ref_images:
109
- print("⊘ No .jpg files found in reference directory")
110
- return True
111
-
112
- print(f"Found {len(ref_images)} reference images")
113
-
114
- # Test registration
115
- print("Registering target_cube...")
116
- start = time.time()
117
- success = pose_estimator.register_object(
118
- object_id="target_cube",
119
- reference_images=ref_images
120
- )
121
- elapsed = time.time() - start
122
-
123
- if success:
124
- print(f"✓ Registration successful ({elapsed:.2f}s)")
125
- else:
126
- print(f"✗ Registration failed")
127
- return False
128
-
129
- # Test pose estimation with first reference image as query
130
- print("Estimating pose (using first reference image as query)...")
131
- start = time.time()
132
- result = pose_estimator.estimate_pose(
133
- object_id="target_cube",
134
- query_image=ref_images[0]
135
- )
136
- elapsed = time.time() - start
137
-
138
- if result["success"]:
139
- num_poses = len(result["poses"])
140
- print(f"✓ Pose estimation successful ({elapsed:.2f}s)")
141
- print(f" Detected poses: {num_poses}")
142
-
143
- if num_poses > 0:
144
- pose = result["poses"][0]
145
- print(f" Position: ({pose['position']['x']:.3f}, {pose['position']['y']:.3f}, {pose['position']['z']:.3f})")
146
- print(f" Confidence: {pose['confidence']:.3f}")
147
- else:
148
- print(f" Note: {result.get('note', 'No poses detected')}")
149
-
150
- return True
151
- else:
152
- print(f"✗ Pose estimation failed: {result.get('error', 'Unknown')}")
153
- return False
154
-
155
-
156
- def test_api_format():
157
- """Test that API format matches expected structure."""
158
- print()
159
- print("Test 3: API Format Validation")
160
- print("-" * 40)
161
-
162
- # Create test object
163
- ref_img = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
164
- pose_estimator.register_object("api_test", [ref_img])
165
-
166
- # Get result
167
- result = pose_estimator.estimate_pose("api_test", ref_img)
168
-
169
- # Check format
170
- required_keys = ["success", "poses"]
171
- optional_keys = ["error", "note"]
172
-
173
- print("Checking response format...")
174
-
175
- for key in required_keys:
176
- if key in result:
177
- print(f" ✓ Has '{key}' field")
178
- else:
179
- print(f" ✗ Missing '{key}' field")
180
- return False
181
-
182
- if result["success"]:
183
- if len(result["poses"]) > 0:
184
- pose = result["poses"][0]
185
- pose_required = ["object_id", "position", "orientation", "confidence", "dimensions"]
186
-
187
- for key in pose_required:
188
- if key in pose:
189
- print(f" ✓ Pose has '{key}' field")
190
- else:
191
- print(f" ✗ Pose missing '{key}' field")
192
- return False
193
-
194
- # Check nested structure
195
- if isinstance(pose["position"], dict) and "x" in pose["position"]:
196
- print(f" ✓ Position format correct")
197
- else:
198
- print(f" ✗ Position format incorrect")
199
- return False
200
-
201
- if isinstance(pose["orientation"], dict) and "w" in pose["orientation"]:
202
- print(f" ✓ Orientation format correct")
203
- else:
204
- print(f" ✗ Orientation format incorrect")
205
- return False
206
- else:
207
- print(f" ℹ No poses detected (OK for placeholder mode)")
208
-
209
- print("✓ API format valid")
210
- return True
211
-
212
-
213
- def main():
214
- """Run all tests."""
215
- print("Starting tests...")
216
- print()
217
-
218
- tests = [
219
- ("Placeholder Mode", test_placeholder_mode),
220
- ("Reference Images", test_with_reference_images),
221
- ("API Format", test_api_format),
222
- ]
223
-
224
- results = []
225
- for name, test_func in tests:
226
- try:
227
- success = test_func()
228
- results.append((name, success))
229
- except Exception as e:
230
- print(f"✗ Exception in {name}: {e}")
231
- results.append((name, False))
232
-
233
- # Summary
234
- print()
235
- print("=" * 60)
236
- print("Test Summary")
237
- print("=" * 60)
238
-
239
- passed = sum(1 for _, success in results if success)
240
- total = len(results)
241
-
242
- for name, success in results:
243
- status = "✓ PASS" if success else "✗ FAIL"
244
- print(f"{status}: {name}")
245
-
246
- print()
247
- print(f"Results: {passed}/{total} tests passed")
248
-
249
- if passed == total:
250
- print()
251
- print("🎉 All tests passed! Ready to deploy.")
252
- print()
253
- print("Next steps:")
254
- print(" 1. Run './deploy.sh' to deploy to Hugging Face")
255
- print(" 2. Or start locally: python app.py")
256
- return 0
257
- else:
258
- print()
259
- print("⚠ Some tests failed. Fix issues before deploying.")
260
- return 1
261
-
262
-
263
- if __name__ == "__main__":
264
- sys.exit(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
verify_weights.py DELETED
@@ -1,138 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Verify that FoundationPose weights are accessible from Hugging Face model repository.
4
-
5
- This script checks:
6
- 1. Model repo exists and is accessible
7
- 2. Required weight files are present
8
- 3. Files can be downloaded successfully
9
- """
10
-
11
- import os
12
- import sys
13
- from pathlib import Path
14
-
15
- try:
16
- from huggingface_hub import list_repo_files, hf_hub_download
17
- except ImportError:
18
- print("❌ huggingface_hub not installed")
19
- print("Install with: pip install huggingface_hub")
20
- sys.exit(1)
21
-
22
- # Configuration
23
- MODEL_REPO = os.environ.get("FOUNDATIONPOSE_MODEL_REPO", "gpue/foundationpose-weights")
24
-
25
- # Required files
26
- REQUIRED_FILES = [
27
- "2023-10-28-18-33-37/config.yml",
28
- "2023-10-28-18-33-37/model_best.pth",
29
- "2024-01-11-20-02-45/config.yml",
30
- "2024-01-11-20-02-45/model_best.pth",
31
- ]
32
-
33
-
34
- def verify_repo_access():
35
- """Verify model repository is accessible."""
36
- print(f"Checking repository: {MODEL_REPO}")
37
- print("-" * 60)
38
-
39
- try:
40
- files = list_repo_files(repo_id=MODEL_REPO, repo_type="model")
41
- print(f"✓ Repository accessible")
42
- print(f"✓ Found {len(files)} files")
43
- return files
44
- except Exception as e:
45
- print(f"❌ Cannot access repository: {e}")
46
- return None
47
-
48
-
49
- def verify_required_files(repo_files):
50
- """Verify all required weight files are present."""
51
- print("\nChecking required files:")
52
- print("-" * 60)
53
-
54
- all_present = True
55
- for required_file in REQUIRED_FILES:
56
- if required_file in repo_files:
57
- print(f"✓ {required_file}")
58
- else:
59
- print(f"❌ Missing: {required_file}")
60
- all_present = False
61
-
62
- return all_present
63
-
64
-
65
- def test_download():
66
- """Test downloading a small file."""
67
- print("\nTesting download:")
68
- print("-" * 60)
69
-
70
- try:
71
- # Download a small config file to test connectivity
72
- test_file = "2023-10-28-18-33-37/config.yml"
73
- print(f"Downloading {test_file}...")
74
-
75
- downloaded = hf_hub_download(
76
- repo_id=MODEL_REPO,
77
- filename=test_file,
78
- repo_type="model"
79
- )
80
-
81
- print(f"✓ Download successful: {downloaded}")
82
-
83
- # Check file size
84
- size = Path(downloaded).stat().st_size
85
- print(f"✓ File size: {size:,} bytes")
86
-
87
- return True
88
-
89
- except Exception as e:
90
- print(f"❌ Download failed: {e}")
91
- return False
92
-
93
-
94
- def main():
95
- """Run all verification checks."""
96
- print("=" * 60)
97
- print("FoundationPose Model Repository Verification")
98
- print("=" * 60)
99
- print()
100
-
101
- # Check 1: Repository access
102
- repo_files = verify_repo_access()
103
- if repo_files is None:
104
- print("\n❌ Verification failed: Cannot access repository")
105
- sys.exit(1)
106
-
107
- # Check 2: Required files
108
- has_all_files = verify_required_files(repo_files)
109
- if not has_all_files:
110
- print("\n❌ Verification failed: Missing required files")
111
- sys.exit(1)
112
-
113
- # Check 3: Download test
114
- can_download = test_download()
115
- if not can_download:
116
- print("\n❌ Verification failed: Cannot download files")
117
- sys.exit(1)
118
-
119
- # All checks passed
120
- print()
121
- print("=" * 60)
122
- print("✓ All verification checks passed!")
123
- print("=" * 60)
124
- print()
125
- print(f"Model repository '{MODEL_REPO}' is ready to use.")
126
- print()
127
- print("To use in your Space:")
128
- print(" 1. Set environment variable:")
129
- print(f" FOUNDATIONPOSE_MODEL_REPO={MODEL_REPO}")
130
- print(" 2. Set USE_HF_WEIGHTS=true")
131
- print(" 3. Set USE_REAL_MODEL=true")
132
- print()
133
-
134
- return 0
135
-
136
-
137
- if __name__ == "__main__":
138
- sys.exit(main())