|
|
import struct |
|
|
import numpy as np |
|
|
import png |
|
|
import re |
|
|
import sys |
|
|
import csv |
|
|
from PIL import Image |
|
|
import h5py |
|
|
|
|
|
|
|
|
FLO_TAG_FLOAT = ( |
|
|
202021.25 |
|
|
) |
|
|
FLO_TAG_STRING = "PIEH" |
|
|
FLO_UNKNOWN_FLOW_THRESH = 1e9 |
|
|
FLO_UNKNOWN_FLOW = 1e10 |
|
|
|
|
|
|
|
|
def readFlowFile(filepath): |
|
|
"""read flow files in several formats. The resulting flow has shape height x width x 2. |
|
|
For positions where there is no groundtruth available, the flow is set to np.nan. |
|
|
Supports flo (Sintel), png (KITTI), npy (numpy), pfm (FlyingThings3D) and flo5 (Spring) file format. |
|
|
filepath: path to the flow file |
|
|
returns: flow with shape height x width x 2 |
|
|
""" |
|
|
if filepath.endswith(".flo"): |
|
|
return readFloFlow(filepath) |
|
|
elif filepath.endswith(".png"): |
|
|
return readPngFlow(filepath) |
|
|
elif filepath.endswith(".npy"): |
|
|
return readNpyFlow(filepath) |
|
|
elif filepath.endswith(".pfm"): |
|
|
return readPfmFlow(filepath) |
|
|
elif filepath.endswith(".flo5"): |
|
|
return readFlo5Flow(filepath) |
|
|
else: |
|
|
raise ValueError(f"readFlowFile: Unknown file format for {filepath}") |
|
|
|
|
|
|
|
|
def writeFlowFile(flow, filepath): |
|
|
"""write optical flow to file. Supports flo (Sintel), png (KITTI) and npy (numpy) file format. |
|
|
flow: optical flow with shape height x width x 2. Invalid values should be represented as np.nan |
|
|
filepath: file path where to write the flow |
|
|
""" |
|
|
if not filepath: |
|
|
raise ValueError("writeFlowFile: empty filepath") |
|
|
|
|
|
if len(flow.shape) != 3 or flow.shape[2] != 2: |
|
|
raise IOError( |
|
|
f"writeFlowFile {filepath}: expected shape height x width x 2 but received {flow.shape}" |
|
|
) |
|
|
|
|
|
if flow.shape[0] > flow.shape[1]: |
|
|
print( |
|
|
f"write flo file {filepath}: Warning: Are you writing an upright image? Expected shape height x width x 2, got {flow.shape}" |
|
|
) |
|
|
|
|
|
if filepath.endswith(".flo"): |
|
|
return writeFloFlow(flow, filepath) |
|
|
elif filepath.endswith(".png"): |
|
|
return writePngFlow(flow, filepath) |
|
|
elif filepath.endswith(".npy"): |
|
|
return writeNpyFile(flow, filepath) |
|
|
elif filepath.endswith(".flo5"): |
|
|
return writeFlo5File(flow, filepath) |
|
|
else: |
|
|
raise ValueError(f"writeFlowFile: Unknown file format for {filepath}") |
|
|
|
|
|
|
|
|
def readFloFlow(filepath): |
|
|
"""read optical flow from file stored in .flo file format as used in the Sintel dataset (Butler et al., 2012) |
|
|
filepath: path to file where to read from |
|
|
returns: flow as a numpy array with shape height x width x 2 |
|
|
--- |
|
|
".flo" file format used for optical flow evaluation |
|
|
|
|
|
Stores 2-band float image for horizontal (u) and vertical (v) flow components. |
|
|
Floats are stored in little-endian order. |
|
|
A flow value is considered "unknown" if either |u| or |v| is greater than 1e9. |
|
|
|
|
|
bytes contents |
|
|
|
|
|
0-3 tag: "PIEH" in ASCII, which in little endian happens to be the float 202021.25 |
|
|
(just a sanity check that floats are represented correctly) |
|
|
4-7 width as an integer |
|
|
8-11 height as an integer |
|
|
12-end data (width*height*2*4 bytes total) |
|
|
the float values for u and v, interleaved, in row order, i.e., |
|
|
u[row0,col0], v[row0,col0], u[row0,col1], v[row0,col1], ... |
|
|
""" |
|
|
if filepath is None: |
|
|
raise IOError("read flo file: empty filename") |
|
|
|
|
|
if not filepath.endswith(".flo"): |
|
|
raise IOError(f"read flo file ({filepath}): extension .flo expected") |
|
|
|
|
|
with open(filepath, "rb") as stream: |
|
|
tag = struct.unpack("f", stream.read(4))[0] |
|
|
width = struct.unpack("i", stream.read(4))[0] |
|
|
height = struct.unpack("i", stream.read(4))[0] |
|
|
|
|
|
if tag != FLO_TAG_FLOAT: |
|
|
raise IOError( |
|
|
f"read flo file({filepath}): wrong tag (possibly due to big-endian machine?)" |
|
|
) |
|
|
|
|
|
|
|
|
if width < 1 or width > 99999: |
|
|
raise IOError(f"read flo file({filepath}): illegal width {width}") |
|
|
|
|
|
if height < 1 or height > 99999: |
|
|
raise IOError(f"read flo file({filepath}): illegal height {height}") |
|
|
|
|
|
nBands = 2 |
|
|
flow = [] |
|
|
|
|
|
n = nBands * width |
|
|
for _ in range(height): |
|
|
data = stream.read(n * 4) |
|
|
if data is None: |
|
|
raise IOError(f"read flo file({filepath}): file is too short") |
|
|
data = np.asarray(struct.unpack(f"{n}f", data)) |
|
|
data = data.reshape((width, nBands)) |
|
|
flow.append(data) |
|
|
|
|
|
if stream.read(1) != b"": |
|
|
raise IOError(f"read flo file({filepath}): file is too long") |
|
|
|
|
|
flow = np.asarray(flow) |
|
|
|
|
|
flow[np.abs(flow) > FLO_UNKNOWN_FLOW_THRESH] = np.nan |
|
|
|
|
|
return flow |
|
|
|
|
|
|
|
|
def writeFloFlow(flow, filepath): |
|
|
""" |
|
|
write optical flow in .flo format to file as used in the Sintel dataset (Butler et al., 2012) |
|
|
flow: optical flow with shape height x width x 2 |
|
|
filepath: optical flow file path to be saved |
|
|
--- |
|
|
".flo" file format used for optical flow evaluation |
|
|
|
|
|
Stores 2-band float image for horizontal (u) and vertical (v) flow components. |
|
|
Floats are stored in little-endian order. |
|
|
A flow value is considered "unknown" if either |u| or |v| is greater than 1e9. |
|
|
|
|
|
bytes contents |
|
|
|
|
|
0-3 tag: "PIEH" in ASCII, which in little endian happens to be the float 202021.25 |
|
|
(just a sanity check that floats are represented correctly) |
|
|
4-7 width as an integer |
|
|
8-11 height as an integer |
|
|
12-end data (width*height*2*4 bytes total) |
|
|
the float values for u and v, interleaved, in row order, i.e., |
|
|
u[row0,col0], v[row0,col0], u[row0,col1], v[row0,col1], ... |
|
|
""" |
|
|
|
|
|
height, width, nBands = flow.shape |
|
|
|
|
|
with open(filepath, "wb") as f: |
|
|
if f is None: |
|
|
raise IOError(f"write flo file {filepath}: file could not be opened") |
|
|
|
|
|
|
|
|
result = f.write(FLO_TAG_STRING.encode("ascii")) |
|
|
result += f.write(struct.pack("i", width)) |
|
|
result += f.write(struct.pack("i", height)) |
|
|
if result != 12: |
|
|
raise IOError(f"write flo file {filepath}: problem writing header") |
|
|
|
|
|
|
|
|
n = nBands * width |
|
|
for i in range(height): |
|
|
data = flow[i, :, :].flatten() |
|
|
data[np.isnan(data)] = FLO_UNKNOWN_FLOW |
|
|
result = f.write(struct.pack(f"{n}f", *data)) |
|
|
if result != n * 4: |
|
|
raise IOError(f"write flo file {filepath}: problem writing row {i}") |
|
|
|
|
|
|
|
|
def readPngFlow(filepath): |
|
|
"""read optical flow from file stored in png file format as used in the KITTI 12 (Geiger et al., 2012) and KITTI 15 (Menze et al., 2015) dataset. |
|
|
filepath: path to file where to read from |
|
|
returns: flow as a numpy array with shape height x width x 2. Invalid values are represented as np.nan |
|
|
""" |
|
|
|
|
|
flow_object = png.Reader(filename=filepath) |
|
|
flow_direct = flow_object.asDirect() |
|
|
flow_data = list(flow_direct[2]) |
|
|
(w, h) = flow_direct[3]["size"] |
|
|
flow = np.zeros((h, w, 3), dtype=np.float64) |
|
|
for i in range(len(flow_data)): |
|
|
flow[i, :, 0] = flow_data[i][0::3] |
|
|
flow[i, :, 1] = flow_data[i][1::3] |
|
|
flow[i, :, 2] = flow_data[i][2::3] |
|
|
|
|
|
invalid_idx = flow[:, :, 2] == 0 |
|
|
flow[:, :, 0:2] = (flow[:, :, 0:2] - 2**15) / 64.0 |
|
|
flow[invalid_idx, 0] = np.nan |
|
|
flow[invalid_idx, 1] = np.nan |
|
|
return flow[:, :, :2] |
|
|
|
|
|
|
|
|
def writePngFlow(flow, filename): |
|
|
"""write optical flow to file png file format as used in the KITTI 12 (Geiger et al., 2012) and KITTI 15 (Menze et al., 2015) dataset. |
|
|
flow: optical flow in shape height x width x 2, invalid values should be represented as np.nan |
|
|
filepath: path to file where to write to |
|
|
""" |
|
|
flow = 64.0 * flow + 2**15 |
|
|
width = flow.shape[1] |
|
|
height = flow.shape[0] |
|
|
valid_map = np.ones([flow.shape[0], flow.shape[1], 1]) |
|
|
valid_map[np.isnan(flow[:, :, 0]) | np.isnan(flow[:, :, 1])] = 0 |
|
|
flow = np.nan_to_num(flow) |
|
|
flow = np.concatenate([flow, valid_map], axis=-1) |
|
|
flow = np.clip(flow, 0, 2**16 - 1) |
|
|
flow = flow.astype(np.uint16) |
|
|
flow = np.reshape(flow, (-1, width * 3)) |
|
|
with open(filename, "wb") as f: |
|
|
writer = png.Writer(width=width, height=height, bitdepth=16, greyscale=False) |
|
|
writer.write(f, flow) |
|
|
|
|
|
|
|
|
def readNpyFlow(filepath): |
|
|
"""read numpy array from file. |
|
|
filepath: file to read from |
|
|
returns: numpy array |
|
|
""" |
|
|
return np.load(filepath) |
|
|
|
|
|
|
|
|
def writeNpyFile(arr, filepath): |
|
|
"""write numpy array to file. |
|
|
arr: numpy array to write |
|
|
filepath: file to write to |
|
|
""" |
|
|
np.save(filepath, arr) |
|
|
|
|
|
|
|
|
def writeFlo5File(flow, filename): |
|
|
with h5py.File(filename, "w") as f: |
|
|
f.create_dataset("flow", data=flow, compression="gzip", compression_opts=5) |
|
|
|
|
|
|
|
|
def readFlo5Flow(filename): |
|
|
with h5py.File(filename, "r") as f: |
|
|
if "flow" not in f.keys(): |
|
|
raise IOError( |
|
|
f"File {filename} does not have a 'flow' key. Is this a valid flo5 file?" |
|
|
) |
|
|
return f["flow"][()] |
|
|
|
|
|
|
|
|
def readPfmFlow(filepath): |
|
|
"""read optical flow from file stored in pfm file format as used in the FlyingThings3D (Mayer et al., 2016) dataset. |
|
|
filepath: path to file where to read from |
|
|
returns: flow as a numpy array with shape height x width x 2. |
|
|
""" |
|
|
flow = readPfmFile(filepath) |
|
|
if len(flow.shape) != 3: |
|
|
raise IOError( |
|
|
f"read pfm flow: PFM file has wrong shape (assumed to be w x h x 3): {flow.shape}" |
|
|
) |
|
|
if flow.shape[2] != 3: |
|
|
raise IOError( |
|
|
f"read pfm flow: PFM file has wrong shape (assumed to be w x h x 3): {flow.shape}" |
|
|
) |
|
|
|
|
|
return flow[:, :, :2] |
|
|
|
|
|
|
|
|
def readPfmFile(filepath): |
|
|
""" |
|
|
adapted from https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html |
|
|
""" |
|
|
file = open(filepath, "rb") |
|
|
|
|
|
color = None |
|
|
width = None |
|
|
height = None |
|
|
scale = None |
|
|
endian = None |
|
|
|
|
|
header = file.readline().rstrip() |
|
|
if header.decode("ascii") == "PF": |
|
|
color = True |
|
|
elif header.decode("ascii") == "Pf": |
|
|
color = False |
|
|
else: |
|
|
raise Exception("Not a PFM file.") |
|
|
|
|
|
dim_match = re.match(r"^(\d+)\s(\d+)\s$", file.readline().decode("ascii")) |
|
|
if dim_match: |
|
|
width, height = list(map(int, dim_match.groups())) |
|
|
else: |
|
|
raise Exception("Malformed PFM header.") |
|
|
|
|
|
scale = float(file.readline().decode("ascii").rstrip()) |
|
|
if scale < 0: |
|
|
endian = "<" |
|
|
scale = -scale |
|
|
else: |
|
|
endian = ">" |
|
|
|
|
|
data = np.fromfile(file, endian + "f") |
|
|
shape = (height, width, 3) if color else (height, width) |
|
|
|
|
|
data = np.reshape(data, shape) |
|
|
data = np.flipud(data) |
|
|
return data |
|
|
|
|
|
|
|
|
def writePfmFile(image, filepath): |
|
|
""" |
|
|
adapted from https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html |
|
|
""" |
|
|
scale = 1 |
|
|
file = open(filepath, "wb") |
|
|
|
|
|
color = None |
|
|
|
|
|
if image.dtype.name != "float32": |
|
|
raise Exception("Image dtype must be float32.") |
|
|
|
|
|
image = np.flipud(image) |
|
|
|
|
|
if len(image.shape) == 3 and image.shape[2] == 3: |
|
|
color = True |
|
|
elif ( |
|
|
len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1 |
|
|
): |
|
|
color = False |
|
|
else: |
|
|
raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.") |
|
|
|
|
|
file.write("PF\n" if color else "Pf\n".encode()) |
|
|
file.write("%d %d\n".encode() % (image.shape[1], image.shape[0])) |
|
|
|
|
|
endian = image.dtype.byteorder |
|
|
|
|
|
if endian == "<" or endian == "=" and sys.byteorder == "little": |
|
|
scale = -scale |
|
|
|
|
|
file.write("%f\n".encode() % scale) |
|
|
|
|
|
image.tofile(file) |
|
|
|
|
|
|
|
|
def readDispFile(filepath): |
|
|
"""read disparity (or disparity change) from file. The resulting numpy array has shape height x width. |
|
|
For positions where there is no groundtruth available, the value is set to np.nan. |
|
|
Supports png (KITTI), npy (numpy) and pfm (FlyingThings3D) file format. |
|
|
filepath: path to the flow file |
|
|
returns: disparity with shape height x width |
|
|
""" |
|
|
if filepath.endswith(".png"): |
|
|
return readPngDisp(filepath) |
|
|
elif filepath.endswith(".npy"): |
|
|
return readNpyFlow(filepath) |
|
|
elif filepath.endswith(".pfm"): |
|
|
return readPfmDisp(filepath) |
|
|
elif filepath.endswith(".dsp5"): |
|
|
return readDsp5Disp(filepath) |
|
|
else: |
|
|
raise ValueError(f"readDispFile: Unknown file format for {filepath}") |
|
|
|
|
|
|
|
|
def readPngDisp(filepath): |
|
|
"""read disparity from file stored in png file format as used in the KITTI 12 (Geiger et al., 2012) and KITTI 15 (Menze et al., 2015) dataset. |
|
|
filepath: path to file where to read from |
|
|
returns: disparity as a numpy array with shape height x width. Invalid values are represented as np.nan |
|
|
""" |
|
|
|
|
|
image_object = png.Reader(filename=filepath) |
|
|
image_direct = image_object.asDirect() |
|
|
image_data = list(image_direct[2]) |
|
|
(w, h) = image_direct[3]["size"] |
|
|
channel = len(image_data[0]) // w |
|
|
if channel != 1: |
|
|
raise IOError("read png disp: assumed channels to be 1!") |
|
|
disp = np.zeros((h, w), dtype=np.float64) |
|
|
for i in range(len(image_data)): |
|
|
disp[i, :] = image_data[i][:] |
|
|
disp[disp == 0] = np.nan |
|
|
return disp[:, :] / 256.0 |
|
|
|
|
|
|
|
|
def readPfmDisp(filepath): |
|
|
"""read disparity or disparity change from file stored in pfm file format as used in the FlyingThings3D (Mayer et al., 2016) dataset. |
|
|
filepath: path to file where to read from |
|
|
returns: disparity as a numpy array with shape height x width. Invalid values are represented as np.nan |
|
|
""" |
|
|
disp = readPfmFile(filepath) |
|
|
if len(disp.shape) != 2: |
|
|
raise IOError( |
|
|
f"read pfm disp: PFM file has wrong shape (assumed to be w x h): {disp.shape}" |
|
|
) |
|
|
return disp |
|
|
|
|
|
|
|
|
def writePngDisp(disp, filepath): |
|
|
"""write disparity to png file format as used in the KITTI 12 (Geiger et al., 2012) and KITTI 15 (Menze et al., 2015) dataset. |
|
|
disp: disparity in shape height x width, invalid values should be represented as np.nan |
|
|
filepath: path to file where to write to |
|
|
""" |
|
|
disp = 256 * disp |
|
|
width = disp.shape[1] |
|
|
height = disp.shape[0] |
|
|
disp = np.clip(disp, 0, 2**16 - 1) |
|
|
disp = np.nan_to_num(disp).astype(np.uint16) |
|
|
disp = np.reshape(disp, (-1, width)) |
|
|
with open(filepath, "wb") as f: |
|
|
writer = png.Writer(width=width, height=height, bitdepth=16, greyscale=True) |
|
|
writer.write(f, disp) |
|
|
|
|
|
|
|
|
def writeDsp5File(disp, filename): |
|
|
with h5py.File(filename, "w") as f: |
|
|
f.create_dataset("disparity", data=disp, compression="gzip", compression_opts=5) |
|
|
|
|
|
|
|
|
def readDsp5Disp(filename): |
|
|
with h5py.File(filename, "r") as f: |
|
|
if "disparity" not in f.keys(): |
|
|
raise IOError( |
|
|
f"File {filename} does not have a 'disparity' key. Is this a valid dsp5 file?" |
|
|
) |
|
|
return f["disparity"][()] |
|
|
|
|
|
|
|
|
def writeDispFile(disp, filepath): |
|
|
"""write disparity to file. Supports png (KITTI) and npy (numpy) file format. |
|
|
disp: disparity with shape height x width. Invalid values should be represented as np.nan |
|
|
filepath: file path where to write the flow |
|
|
""" |
|
|
if not filepath: |
|
|
raise ValueError("writeDispFile: empty filepath") |
|
|
|
|
|
if len(disp.shape) != 2: |
|
|
raise IOError( |
|
|
f"writeDispFile {filepath}: expected shape height x width but received {disp.shape}" |
|
|
) |
|
|
|
|
|
if disp.shape[0] > disp.shape[1]: |
|
|
print( |
|
|
f"writeDispFile {filepath}: Warning: Are you writing an upright image? Expected shape height x width, got {disp.shape}" |
|
|
) |
|
|
|
|
|
if filepath.endswith(".png"): |
|
|
writePngDisp(disp, filepath) |
|
|
elif filepath.endswith(".npy"): |
|
|
writeNpyFile(disp, filepath) |
|
|
elif filepath.endswith(".dsp5"): |
|
|
writeDsp5File(disp, filepath) |
|
|
|
|
|
|
|
|
def readKITTIObjMap(filepath): |
|
|
assert filepath.endswith(".png") |
|
|
return np.asarray(Image.open(filepath)) > 0 |
|
|
|
|
|
|
|
|
def readKITTIIntrinsics(filepath, image=2): |
|
|
assert filepath.endswith(".txt") |
|
|
|
|
|
with open(filepath) as f: |
|
|
reader = csv.reader(f, delimiter=" ") |
|
|
for row in reader: |
|
|
if row[0] == f"K_{image:02d}:": |
|
|
K = np.array(row[1:], dtype=np.float32).reshape(3, 3) |
|
|
kvec = np.array([K[0, 0], K[1, 1], K[0, 2], K[1, 2]]) |
|
|
return kvec |
|
|
|
|
|
|
|
|
def writePngMapFile(map_, filename): |
|
|
Image.fromarray(map_).save(filename) |
|
|
|