Georg commited on
Commit
19d8da0
·
1 Parent(s): 68cd801

Prepare job build context

Browse files
Files changed (2) hide show
  1. app.py +20 -11
  2. estimator.py +3 -0
app.py CHANGED
@@ -66,13 +66,22 @@ def generate_slimsam_mask(rgb_image: np.ndarray, box_prompt: List[int]) -> tuple
66
 
67
  model, processor, device = _get_slimsam()
68
  raw_image = Image.fromarray(rgb_image).convert("RGB")
69
- inputs = processor(raw_image, input_boxes=[[box_prompt]], return_tensors="pt").to(device)
 
 
 
 
 
 
 
 
 
70
  outputs = model(**inputs)
71
 
72
  masks = processor.image_processor.post_process_masks(
73
  outputs.pred_masks.cpu(),
74
- inputs["original_sizes"].cpu(),
75
- inputs["reshaped_input_sizes"].cpu(),
76
  )[0]
77
  scores = outputs.iou_scores.squeeze().cpu()
78
  best_idx = int(scores.argmax().item())
@@ -533,11 +542,11 @@ with gr.Blocks(title="FoundationPose Inference", theme=gr.themes.Soft()) as demo
533
 
534
  gr.Markdown("### Camera Intrinsics")
535
  with gr.Row():
536
- cad_fx = gr.Number(label="fx", value=500.0)
537
- cad_fy = gr.Number(label="fy", value=500.0)
538
  with gr.Row():
539
- cad_cx = gr.Number(label="cx", value=320.0)
540
- cad_cy = gr.Number(label="cy", value=240.0)
541
 
542
  cad_init_button = gr.Button("Initialize with CAD", variant="primary")
543
 
@@ -591,11 +600,11 @@ with gr.Blocks(title="FoundationPose Inference", theme=gr.themes.Soft()) as demo
591
 
592
  gr.Markdown("### Camera Intrinsics")
593
  with gr.Row():
594
- est_fx = gr.Number(label="fx (focal length x)", value=500.0)
595
- est_fy = gr.Number(label="fy (focal length y)", value=500.0)
596
  with gr.Row():
597
- est_cx = gr.Number(label="cx (principal point x)", value=320.0)
598
- est_cy = gr.Number(label="cy (principal point y)", value=240.0)
599
 
600
  est_button = gr.Button("Estimate Pose", variant="primary")
601
 
 
66
 
67
  model, processor, device = _get_slimsam()
68
  raw_image = Image.fromarray(rgb_image).convert("RGB")
69
+ enc = processor(raw_image, input_boxes=[[box_prompt]], return_tensors="np")
70
+ # Keep size tensors on CPU for post-processing
71
+ original_sizes = torch.as_tensor(enc["original_sizes"])
72
+ reshaped_sizes = torch.as_tensor(enc["reshaped_input_sizes"])
73
+ # Move model inputs to device
74
+ inputs = {
75
+ k: torch.as_tensor(v).to(device)
76
+ for k, v in enc.items()
77
+ if k not in {"original_sizes", "reshaped_input_sizes"}
78
+ }
79
  outputs = model(**inputs)
80
 
81
  masks = processor.image_processor.post_process_masks(
82
  outputs.pred_masks.cpu(),
83
+ original_sizes,
84
+ reshaped_sizes,
85
  )[0]
86
  scores = outputs.iou_scores.squeeze().cpu()
87
  best_idx = int(scores.argmax().item())
 
542
 
543
  gr.Markdown("### Camera Intrinsics")
544
  with gr.Row():
545
+ cad_fx = gr.Number(label="fx", value=193.13708498984758)
546
+ cad_fy = gr.Number(label="fy", value=193.13708498984758)
547
  with gr.Row():
548
+ cad_cx = gr.Number(label="cx", value=120.0)
549
+ cad_cy = gr.Number(label="cy", value=80.0)
550
 
551
  cad_init_button = gr.Button("Initialize with CAD", variant="primary")
552
 
 
600
 
601
  gr.Markdown("### Camera Intrinsics")
602
  with gr.Row():
603
+ est_fx = gr.Number(label="fx (focal length x)", value=193.13708498984758)
604
+ est_fy = gr.Number(label="fy (focal length y)", value=193.13708498984758)
605
  with gr.Row():
606
+ est_cx = gr.Number(label="cx (principal point x)", value=120.0)
607
+ est_cy = gr.Number(label="cy (principal point y)", value=80.0)
608
 
609
  est_button = gr.Button("Estimate Pose", variant="primary")
610
 
estimator.py CHANGED
@@ -300,6 +300,9 @@ class FoundationPoseEstimator:
300
  Returns:
301
  Dictionary with position, orientation (quaternion), and confidence
302
  """
 
 
 
303
  # Extract translation
304
  translation = pose_matrix[:3, 3]
305
 
 
300
  Returns:
301
  Dictionary with position, orientation (quaternion), and confidence
302
  """
303
+ if torch.is_tensor(pose_matrix):
304
+ pose_matrix = pose_matrix.detach().cpu().numpy()
305
+
306
  # Extract translation
307
  translation = pose_matrix[:3, 3]
308