Spaces:
Running on T4
Running on T4
Georg commited on
Commit ·
19d8da0
1
Parent(s): 68cd801
Prepare job build context
Browse files- app.py +20 -11
- estimator.py +3 -0
app.py
CHANGED
|
@@ -66,13 +66,22 @@ def generate_slimsam_mask(rgb_image: np.ndarray, box_prompt: List[int]) -> tuple
|
|
| 66 |
|
| 67 |
model, processor, device = _get_slimsam()
|
| 68 |
raw_image = Image.fromarray(rgb_image).convert("RGB")
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
outputs = model(**inputs)
|
| 71 |
|
| 72 |
masks = processor.image_processor.post_process_masks(
|
| 73 |
outputs.pred_masks.cpu(),
|
| 74 |
-
|
| 75 |
-
|
| 76 |
)[0]
|
| 77 |
scores = outputs.iou_scores.squeeze().cpu()
|
| 78 |
best_idx = int(scores.argmax().item())
|
|
@@ -533,11 +542,11 @@ with gr.Blocks(title="FoundationPose Inference", theme=gr.themes.Soft()) as demo
|
|
| 533 |
|
| 534 |
gr.Markdown("### Camera Intrinsics")
|
| 535 |
with gr.Row():
|
| 536 |
-
cad_fx = gr.Number(label="fx", value=
|
| 537 |
-
cad_fy = gr.Number(label="fy", value=
|
| 538 |
with gr.Row():
|
| 539 |
-
cad_cx = gr.Number(label="cx", value=
|
| 540 |
-
cad_cy = gr.Number(label="cy", value=
|
| 541 |
|
| 542 |
cad_init_button = gr.Button("Initialize with CAD", variant="primary")
|
| 543 |
|
|
@@ -591,11 +600,11 @@ with gr.Blocks(title="FoundationPose Inference", theme=gr.themes.Soft()) as demo
|
|
| 591 |
|
| 592 |
gr.Markdown("### Camera Intrinsics")
|
| 593 |
with gr.Row():
|
| 594 |
-
est_fx = gr.Number(label="fx (focal length x)", value=
|
| 595 |
-
est_fy = gr.Number(label="fy (focal length y)", value=
|
| 596 |
with gr.Row():
|
| 597 |
-
est_cx = gr.Number(label="cx (principal point x)", value=
|
| 598 |
-
est_cy = gr.Number(label="cy (principal point y)", value=
|
| 599 |
|
| 600 |
est_button = gr.Button("Estimate Pose", variant="primary")
|
| 601 |
|
|
|
|
| 66 |
|
| 67 |
model, processor, device = _get_slimsam()
|
| 68 |
raw_image = Image.fromarray(rgb_image).convert("RGB")
|
| 69 |
+
enc = processor(raw_image, input_boxes=[[box_prompt]], return_tensors="np")
|
| 70 |
+
# Keep size tensors on CPU for post-processing
|
| 71 |
+
original_sizes = torch.as_tensor(enc["original_sizes"])
|
| 72 |
+
reshaped_sizes = torch.as_tensor(enc["reshaped_input_sizes"])
|
| 73 |
+
# Move model inputs to device
|
| 74 |
+
inputs = {
|
| 75 |
+
k: torch.as_tensor(v).to(device)
|
| 76 |
+
for k, v in enc.items()
|
| 77 |
+
if k not in {"original_sizes", "reshaped_input_sizes"}
|
| 78 |
+
}
|
| 79 |
outputs = model(**inputs)
|
| 80 |
|
| 81 |
masks = processor.image_processor.post_process_masks(
|
| 82 |
outputs.pred_masks.cpu(),
|
| 83 |
+
original_sizes,
|
| 84 |
+
reshaped_sizes,
|
| 85 |
)[0]
|
| 86 |
scores = outputs.iou_scores.squeeze().cpu()
|
| 87 |
best_idx = int(scores.argmax().item())
|
|
|
|
| 542 |
|
| 543 |
gr.Markdown("### Camera Intrinsics")
|
| 544 |
with gr.Row():
|
| 545 |
+
cad_fx = gr.Number(label="fx", value=193.13708498984758)
|
| 546 |
+
cad_fy = gr.Number(label="fy", value=193.13708498984758)
|
| 547 |
with gr.Row():
|
| 548 |
+
cad_cx = gr.Number(label="cx", value=120.0)
|
| 549 |
+
cad_cy = gr.Number(label="cy", value=80.0)
|
| 550 |
|
| 551 |
cad_init_button = gr.Button("Initialize with CAD", variant="primary")
|
| 552 |
|
|
|
|
| 600 |
|
| 601 |
gr.Markdown("### Camera Intrinsics")
|
| 602 |
with gr.Row():
|
| 603 |
+
est_fx = gr.Number(label="fx (focal length x)", value=193.13708498984758)
|
| 604 |
+
est_fy = gr.Number(label="fy (focal length y)", value=193.13708498984758)
|
| 605 |
with gr.Row():
|
| 606 |
+
est_cx = gr.Number(label="cx (principal point x)", value=120.0)
|
| 607 |
+
est_cy = gr.Number(label="cy (principal point y)", value=80.0)
|
| 608 |
|
| 609 |
est_button = gr.Button("Estimate Pose", variant="primary")
|
| 610 |
|
estimator.py
CHANGED
|
@@ -300,6 +300,9 @@ class FoundationPoseEstimator:
|
|
| 300 |
Returns:
|
| 301 |
Dictionary with position, orientation (quaternion), and confidence
|
| 302 |
"""
|
|
|
|
|
|
|
|
|
|
| 303 |
# Extract translation
|
| 304 |
translation = pose_matrix[:3, 3]
|
| 305 |
|
|
|
|
| 300 |
Returns:
|
| 301 |
Dictionary with position, orientation (quaternion), and confidence
|
| 302 |
"""
|
| 303 |
+
if torch.is_tensor(pose_matrix):
|
| 304 |
+
pose_matrix = pose_matrix.detach().cpu().numpy()
|
| 305 |
+
|
| 306 |
# Extract translation
|
| 307 |
translation = pose_matrix[:3, 3]
|
| 308 |
|