| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | import argparse |
| | import os |
| | from pathlib import Path, PurePosixPath |
| |
|
| | import numpy as np |
| | import json |
| | import sys |
| | import math |
| | import cv2 |
| | import os |
| | import shutil |
| |
|
| | def parse_args(): |
| | parser = argparse.ArgumentParser(description="convert a text colmap export to nerf format transforms.json; optionally convert video to images, and optionally run colmap in the first place") |
| |
|
| | parser.add_argument("--video_in", default="", help="run ffmpeg first to convert a provided video file into a set of images. uses the video_fps parameter also") |
| | parser.add_argument("--video_fps", default=2) |
| | parser.add_argument("--time_slice", default="", help="time (in seconds) in the format t1,t2 within which the images should be generated from the video. eg: \"--time_slice '10,300'\" will generate images only from 10th second to 300th second of the video") |
| | parser.add_argument("--run_colmap", action="store_true", help="run colmap first on the image folder") |
| | parser.add_argument("--colmap_matcher", default="sequential", choices=["exhaustive","sequential","spatial","transitive","vocab_tree"], help="select which matcher colmap should use. sequential for videos, exhaustive for adhoc images") |
| | parser.add_argument("--colmap_db", default="colmap.db", help="colmap database filename") |
| | parser.add_argument("--colmap_camera_model", default="OPENCV", choices=["SIMPLE_PINHOLE", "PINHOLE", "SIMPLE_RADIAL", "RADIAL","OPENCV"], help="camera model") |
| | parser.add_argument("--colmap_camera_params", default="", help="intrinsic parameters, depending on the chosen model. Format: fx,fy,cx,cy,dist") |
| | parser.add_argument("--images", default="images", help="input path to the images") |
| | parser.add_argument("--text", default="colmap_text", help="input path to the colmap text files (set automatically if run_colmap is used)") |
| | parser.add_argument("--aabb_scale", default=16, choices=["1", "2", "4", "8", "16", "32", "64", "128"], help="large scene scale factor. 1=scene fits in unit cube; power of 2 up to 16") |
| | parser.add_argument("--skip_early", default=0, help="skip this many images from the start") |
| | parser.add_argument("--keep_colmap_coords", action="store_true", help="keep transforms.json in COLMAP's original frame of reference (this will avoid reorienting and repositioning the scene for preview and rendering)") |
| | parser.add_argument("--out", default="transforms.json", help="output path") |
| | parser.add_argument("--vocab_path", default="", help="vocabulary tree path") |
| | args = parser.parse_args() |
| | return args |
| |
|
| | def do_system(arg): |
| | print(f"==== running: {arg}") |
| | err = os.system(arg) |
| | if err: |
| | print("FATAL: command failed") |
| | sys.exit(err) |
| |
|
| | def run_ffmpeg(args): |
| | if not os.path.isabs(args.images): |
| | args.images = os.path.join(os.path.dirname(args.video_in), args.images) |
| | images = "\"" + args.images + "\"" |
| | video = "\"" + args.video_in + "\"" |
| | fps = float(args.video_fps) or 1.0 |
| | print(f"running ffmpeg with input video file={video}, output image folder={images}, fps={fps}.") |
| | if (input(f"warning! folder '{images}' will be deleted/replaced. continue? (Y/n)").lower().strip()+"y")[:1] != "y": |
| | sys.exit(1) |
| | try: |
| | |
| | shutil.rmtree(args.images) |
| | except: |
| | pass |
| | do_system(f"mkdir {images}") |
| |
|
| | time_slice_value = "" |
| | time_slice = args.time_slice |
| | if time_slice: |
| | start, end = time_slice.split(",") |
| | time_slice_value = f",select='between(t\,{start}\,{end})'" |
| | do_system(f"ffmpeg -i {video} -qscale:v 1 -qmin 1 -vf \"fps={fps}{time_slice_value}\" {images}/%04d.jpg") |
| |
|
| | def run_colmap(args): |
| | db = args.colmap_db |
| | images = "\"" + args.images + "\"" |
| | db_noext=str(Path(db).with_suffix("")) |
| |
|
| | if args.text=="text": |
| | args.text=db_noext+"_text" |
| | text=args.text |
| | sparse=db_noext+"_sparse" |
| | print(f"running colmap with:\n\tdb={db}\n\timages={images}\n\tsparse={sparse}\n\ttext={text}") |
| | if (input(f"warning! folders '{sparse}' and '{text}' will be deleted/replaced. continue? (Y/n)").lower().strip()+"y")[:1] != "y": |
| | sys.exit(1) |
| | if os.path.exists(db): |
| | os.remove(db) |
| | do_system(f"colmap feature_extractor --ImageReader.camera_model {args.colmap_camera_model} --ImageReader.camera_params \"{args.colmap_camera_params}\" --SiftExtraction.estimate_affine_shape=true --SiftExtraction.domain_size_pooling=true --ImageReader.single_camera 1 --database_path {db} --image_path {images}") |
| | match_cmd = f"colmap {args.colmap_matcher}_matcher --SiftMatching.guided_matching=true --database_path {db}" |
| | if args.vocab_path: |
| | match_cmd += f" --VocabTreeMatching.vocab_tree_path {args.vocab_path}" |
| | do_system(match_cmd) |
| | try: |
| | shutil.rmtree(sparse) |
| | except: |
| | pass |
| | do_system(f"mkdir {sparse}") |
| | do_system(f"colmap mapper --database_path {db} --image_path {images} --output_path {sparse}") |
| | do_system(f"colmap bundle_adjuster --input_path {sparse}/0 --output_path {sparse}/0 --BundleAdjustment.refine_principal_point 1") |
| | try: |
| | shutil.rmtree(text) |
| | except: |
| | pass |
| | do_system(f"mkdir {text}") |
| | do_system(f"colmap model_converter --input_path {sparse}/0 --output_path {text} --output_type TXT") |
| |
|
| | def variance_of_laplacian(image): |
| | return cv2.Laplacian(image, cv2.CV_64F).var() |
| |
|
| | def sharpness(imagePath): |
| | image = cv2.imread(imagePath) |
| | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
| | fm = variance_of_laplacian(gray) |
| | return fm |
| |
|
| | def qvec2rotmat(qvec): |
| | return np.array([ |
| | [ |
| | 1 - 2 * qvec[2]**2 - 2 * qvec[3]**2, |
| | 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3], |
| | 2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2] |
| | ], [ |
| | 2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3], |
| | 1 - 2 * qvec[1]**2 - 2 * qvec[3]**2, |
| | 2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1] |
| | ], [ |
| | 2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2], |
| | 2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1], |
| | 1 - 2 * qvec[1]**2 - 2 * qvec[2]**2 |
| | ] |
| | ]) |
| |
|
| | def rotmat(a, b): |
| | a, b = a / np.linalg.norm(a), b / np.linalg.norm(b) |
| | v = np.cross(a, b) |
| | c = np.dot(a, b) |
| | |
| | if c < -1 + 1e-10: |
| | return rotmat(a + np.random.uniform(-1e-2, 1e-2, 3), b) |
| | s = np.linalg.norm(v) |
| | kmat = np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]]) |
| | return np.eye(3) + kmat + kmat.dot(kmat) * ((1 - c) / (s ** 2 + 1e-10)) |
| |
|
| | def closest_point_2_lines(oa, da, ob, db): |
| | da = da / np.linalg.norm(da) |
| | db = db / np.linalg.norm(db) |
| | c = np.cross(da, db) |
| | denom = np.linalg.norm(c)**2 |
| | t = ob - oa |
| | ta = np.linalg.det([t, db, c]) / (denom + 1e-10) |
| | tb = np.linalg.det([t, da, c]) / (denom + 1e-10) |
| | if ta > 0: |
| | ta = 0 |
| | if tb > 0: |
| | tb = 0 |
| | return (oa+ta*da+ob+tb*db) * 0.5, denom |
| |
|
| | if __name__ == "__main__": |
| | args = parse_args() |
| | if args.video_in != "": |
| | run_ffmpeg(args) |
| | if args.run_colmap: |
| | run_colmap(args) |
| | AABB_SCALE = int(args.aabb_scale) |
| | SKIP_EARLY = int(args.skip_early) |
| | IMAGE_FOLDER = args.images |
| | TEXT_FOLDER = args.text |
| | OUT_PATH = args.out |
| | print(f"outputting to {OUT_PATH}...") |
| | with open(os.path.join(TEXT_FOLDER,"cameras.txt"), "r") as f: |
| | angle_x = math.pi / 2 |
| | for line in f: |
| | |
| | |
| | |
| | if line[0] == "#": |
| | continue |
| | els = line.split(" ") |
| | w = float(els[2]) |
| | h = float(els[3]) |
| | fl_x = float(els[4]) |
| | fl_y = float(els[4]) |
| | k1 = 0 |
| | k2 = 0 |
| | p1 = 0 |
| | p2 = 0 |
| | cx = w / 2 |
| | cy = h / 2 |
| | if els[1] == "SIMPLE_PINHOLE": |
| | cx = float(els[5]) |
| | cy = float(els[6]) |
| | elif els[1] == "PINHOLE": |
| | fl_y = float(els[5]) |
| | cx = float(els[6]) |
| | cy = float(els[7]) |
| | elif els[1] == "SIMPLE_RADIAL": |
| | cx = float(els[5]) |
| | cy = float(els[6]) |
| | k1 = float(els[7]) |
| | elif els[1] == "RADIAL": |
| | cx = float(els[5]) |
| | cy = float(els[6]) |
| | k1 = float(els[7]) |
| | k2 = float(els[8]) |
| | elif els[1] == "OPENCV": |
| | fl_y = float(els[5]) |
| | cx = float(els[6]) |
| | cy = float(els[7]) |
| | k1 = float(els[8]) |
| | k2 = float(els[9]) |
| | p1 = float(els[10]) |
| | p2 = float(els[11]) |
| | else: |
| | print("unknown camera model ", els[1]) |
| | |
| | angle_x = math.atan(w / (fl_x * 2)) * 2 |
| | angle_y = math.atan(h / (fl_y * 2)) * 2 |
| | fovx = angle_x * 180 / math.pi |
| | fovy = angle_y * 180 / math.pi |
| |
|
| | print(f"camera:\n\tres={w,h}\n\tcenter={cx,cy}\n\tfocal={fl_x,fl_y}\n\tfov={fovx,fovy}\n\tk={k1,k2} p={p1,p2} ") |
| |
|
| | with open(os.path.join(TEXT_FOLDER,"images.txt"), "r") as f: |
| | i = 0 |
| | bottom = np.array([0.0, 0.0, 0.0, 1.0]).reshape([1, 4]) |
| | out = { |
| | "camera_angle_x": angle_x, |
| | "camera_angle_y": angle_y, |
| | "fl_x": fl_x, |
| | "fl_y": fl_y, |
| | "k1": k1, |
| | "k2": k2, |
| | "p1": p1, |
| | "p2": p2, |
| | "cx": cx, |
| | "cy": cy, |
| | "w": w, |
| | "h": h, |
| | "aabb_scale": AABB_SCALE, |
| | "frames": [], |
| | } |
| |
|
| | up = np.zeros(3) |
| | for line in f: |
| | line = line.strip() |
| | if line[0] == "#": |
| | continue |
| | i = i + 1 |
| | if i < SKIP_EARLY*2: |
| | continue |
| | if i % 2 == 1: |
| | elems=line.split(" ") |
| | |
| | |
| | image_rel = os.path.relpath(IMAGE_FOLDER) |
| | name = str(f"./{image_rel}/{'_'.join(elems[9:])}") |
| | b=sharpness(name) |
| | print(name, "sharpness=",b) |
| | image_id = int(elems[0]) |
| | qvec = np.array(tuple(map(float, elems[1:5]))) |
| | tvec = np.array(tuple(map(float, elems[5:8]))) |
| | R = qvec2rotmat(-qvec) |
| | t = tvec.reshape([3,1]) |
| | m = np.concatenate([np.concatenate([R, t], 1), bottom], 0) |
| | c2w = np.linalg.inv(m) |
| | if not args.keep_colmap_coords: |
| | c2w[0:3,2] *= -1 |
| | c2w[0:3,1] *= -1 |
| | c2w = c2w[[1,0,2,3],:] |
| | c2w[2,:] *= -1 |
| |
|
| | up += c2w[0:3,1] |
| |
|
| | frame={"file_path":name,"sharpness":b,"transform_matrix": c2w} |
| | out["frames"].append(frame) |
| | nframes = len(out["frames"]) |
| |
|
| | if args.keep_colmap_coords: |
| | flip_mat = np.array([ |
| | [1, 0, 0, 0], |
| | [0, -1, 0, 0], |
| | [0, 0, -1, 0], |
| | [0, 0, 0, 1] |
| | ]) |
| |
|
| | for f in out["frames"]: |
| | f["transform_matrix"] = np.matmul(f["transform_matrix"], flip_mat) |
| | else: |
| | |
| |
|
| | up = up / np.linalg.norm(up) |
| | print("up vector was", up) |
| | R = rotmat(up,[0,0,1]) |
| | R = np.pad(R,[0,1]) |
| | R[-1, -1] = 1 |
| |
|
| | for f in out["frames"]: |
| | f["transform_matrix"] = np.matmul(R, f["transform_matrix"]) |
| |
|
| | |
| | print("computing center of attention...") |
| | totw = 0.0 |
| | totp = np.array([0.0, 0.0, 0.0]) |
| | for f in out["frames"]: |
| | mf = f["transform_matrix"][0:3,:] |
| | for g in out["frames"]: |
| | mg = g["transform_matrix"][0:3,:] |
| | p, w = closest_point_2_lines(mf[:,3], mf[:,2], mg[:,3], mg[:,2]) |
| | if w > 0.00001: |
| | totp += p*w |
| | totw += w |
| | if totw > 0.0: |
| | totp /= totw |
| | print(totp) |
| | for f in out["frames"]: |
| | f["transform_matrix"][0:3,3] -= totp |
| |
|
| | avglen = 0. |
| | for f in out["frames"]: |
| | avglen += np.linalg.norm(f["transform_matrix"][0:3,3]) |
| | avglen /= nframes |
| | print("avg camera distance from origin", avglen) |
| | for f in out["frames"]: |
| | f["transform_matrix"][0:3,3] *= 4.0 / avglen |
| |
|
| | for f in out["frames"]: |
| | f["transform_matrix"] = f["transform_matrix"].tolist() |
| | print(nframes,"frames") |
| | print(f"writing {OUT_PATH}") |
| | with open(OUT_PATH, "w") as outfile: |
| | json.dump(out, outfile, indent=2) |
| |
|