Spaces:
Build error
Build error
add: changes
Browse files- .gitattributes +1 -0
- app.py +32 -1
.gitattributes
CHANGED
|
@@ -26,3 +26,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 26 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
| 27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
monoscene_kitti.ckpt filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 26 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
| 27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
monoscene_kitti.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
monoscene_nyu.ckpt filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -22,10 +22,41 @@ model = MonoScene.load_from_checkpoint(
|
|
| 22 |
full_scene_size=(60, 36, 60),
|
| 23 |
)
|
| 24 |
|
| 25 |
-
img_W, img_H = 640, 480
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
def predict(img):
|
|
|
|
| 29 |
img = np.array(img, dtype=np.float32, copy=False) / 255.0
|
| 30 |
|
| 31 |
normalize_rgb = transforms.Compose(
|
|
|
|
| 22 |
full_scene_size=(60, 36, 60),
|
| 23 |
)
|
| 24 |
|
|
|
|
| 25 |
|
| 26 |
+
def get_projections(img_W, img_H):
|
| 27 |
+
scale_3ds = [1, 2]
|
| 28 |
+
data = {}
|
| 29 |
+
for scale_3d in scale_3ds:
|
| 30 |
+
scene_size = (4.8, 4.8, 2.88)
|
| 31 |
+
vox_origin = np.array([-1.54591799, 0.8907361 , -0.05 ])
|
| 32 |
+
voxel_size = 0.08
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
cam_k = np.array([[518.8579, 0, 320], [0, 518.8579, 240], [0, 0, 1]])
|
| 36 |
+
cam_pose = np.asarray([[ 9.6699458e-01, 4.2662762e-02, 2.5120059e-01, 0.0000000e+00],
|
| 37 |
+
[-2.5147417e-01, 1.0867463e-03, 9.6786356e-01, 0.0000000e+00],
|
| 38 |
+
[ 4.1018680e-02, -9.9908894e-01, 1.1779292e-02, 1.1794727e+00],
|
| 39 |
+
[ 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0000000e+00]])
|
| 40 |
+
T_velo_2_cam = np.linalg.inv(cam_pose)
|
| 41 |
+
|
| 42 |
+
# compute the 3D-2D mapping
|
| 43 |
+
projected_pix, fov_mask, pix_z = vox2pix(
|
| 44 |
+
T_velo_2_cam,
|
| 45 |
+
cam_k,
|
| 46 |
+
vox_origin,
|
| 47 |
+
voxel_size * scale_3d,
|
| 48 |
+
img_W,
|
| 49 |
+
img_H,
|
| 50 |
+
scene_size,
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
data["projected_pix_{}".format(scale_3d)] = projected_pix
|
| 54 |
+
data["pix_z_{}".format(scale_3d)] = pix_z
|
| 55 |
+
data["fov_mask_{}".format(scale_3d)] = fov_mask
|
| 56 |
+
return data
|
| 57 |
|
| 58 |
def predict(img):
|
| 59 |
+
img_W, img_H = 640, 480
|
| 60 |
img = np.array(img, dtype=np.float32, copy=False) / 255.0
|
| 61 |
|
| 62 |
normalize_rgb = transforms.Compose(
|