File size: 2,685 Bytes
06c11b0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | import numpy as np
import torch
import cv2
def add_elapsed_steps_overlay(obs, display_value):
"""Add visual overlay showing a value on the observation images
Args:
obs: observation dictionary
display_value: the value to display on the overlay
"""
if "sensor_data" in obs and "base_camera" in obs["sensor_data"]:
images = obs["sensor_data"]["base_camera"]["rgb"]
# Handle both single image and batched images
if isinstance(images, torch.Tensor):
images_np = images.cpu().numpy()
else:
images_np = np.array(images)
# Get display value
value = int(display_value)
# Process each image in the batch
original_shape = images_np.shape
if len(original_shape) == 3: # Single image (H, W, C)
images_np = images_np[np.newaxis, ...]
processed_images = []
for img in images_np:
# Convert from float [0, 1] to uint8 [0, 255] if needed
if img.dtype == np.float32 or img.dtype == np.float64:
img = (img * 255).astype(np.uint8)
# Add text overlay using cv2
img_with_text = img.copy()
text = f"Steps: {value}"
color = (255, 255, 255) # White
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 0.7
thickness = 2
bg_color = (0, 0, 0) # Black background
# Get text size for background rectangle
(text_width, text_height), baseline = cv2.getTextSize(text, font, font_scale, thickness)
# Position at bottom-left corner with padding
img_height = img_with_text.shape[0]
x = 10
y = img_height - 15
# Draw black background rectangle
cv2.rectangle(img_with_text, (x - 5, y - text_height - 5),
(x + text_width + 5, y + baseline + 5), bg_color, -1)
# Draw text
cv2.putText(img_with_text, text, (x, y), font, font_scale, color, thickness)
# Convert back to float [0, 1] if original was float
if original_shape[-1] == 3 and (img.dtype == np.float32 or img.dtype == np.float64):
img_with_text = img_with_text.astype(np.float32) / 255.0
processed_images.append(img_with_text)
# Convert back to tensor and restore original shape
processed_images = np.array(processed_images)
if len(original_shape) == 3: # Was single image
processed_images = processed_images[0]
obs["sensor_data"]["base_camera"]["rgb"] = torch.from_numpy(processed_images).to(images.device)
return obs
|