File size: 13,453 Bytes

b27cd24

# batch_line_visibility.py
import os
import csv
import json
import numpy as np
import cv2

from eval_gpt import load_all_json_recursive_with_paths  # same as your attached script
from identify_queue_start_end import identify_start_end_bboxes, load_fpx_from_txt


GT_FOLDER = "/scratch/ds5725/linefinder/LineFinder/GT_json"
IMG_FOLDER = "/scratch/ds5725/linefinder/LineFinder/Images"

DEPTH_DIR = "/scratch/ds5725/linefinder/LineFinder/depth_map"
BBOX_ORIENT_DIR = "/scratch/ds5725/linefinder/LineFinder/bbox_orient"
FOCAL_TXT = "/scratch/ds5725/linefinder/LineFinder/focal_length_px.txt"

def flatten_and_fix_gt(gt_entry: dict) -> dict:
    """
    Input GT format (nested):
      gt_entry["end_of_line"]["visible"]
      gt_entry["end_of_line"]["location_if_visible"]
      gt_entry["end_of_line"]["direction_to_turn_if_not_visible"]
    Output (flat, matching prediction keys):
      end_of_line_visible
      end_of_line_location_if_visible
      direction_to_turn_to_see_end_if_not_visible
    Same for start.

    Also enforces:
      visible=="yes" => turn="N/A"
      visible=="no"  => location="N/A"
    """
    def get_nested(side: str, key: str, default=""):
        obj = gt_entry.get(f"{side}_of_line", {})
        if not isinstance(obj, dict):
            return default
        return obj.get(key, default)

    flat = {
        "end_of_line_visible": str(get_nested("end", "visible", "")).strip().lower(),
        "end_of_line_location_if_visible": str(get_nested("end", "location_if_visible", "N/A")).strip().lower(),
        "direction_to_turn_to_see_end_if_not_visible": str(get_nested("end", "direction_to_turn_if_not_visible", "N/A")).strip().lower(),

        "start_of_line_visible": str(get_nested("start", "visible", "")).strip().lower(),
        "start_of_line_location_if_visible": str(get_nested("start", "location_if_visible", "N/A")).strip().lower(),
        "direction_to_turn_to_see_start_if_not_visible": str(get_nested("start", "direction_to_turn_if_not_visible", "N/A")).strip().lower(),
    }

    # Canonicalize / repair consistency
    def fix(prefix: str):
        vis_k = f"{prefix}_of_line_visible"
        loc_k = f"{prefix}_of_line_location_if_visible"
        turn_k = f"direction_to_turn_to_see_{prefix}_if_not_visible"

        vis = flat.get(vis_k, "")
        if vis not in ("yes", "no"):
            return

        if vis == "yes":
            flat[turn_k] = "N/A"
            valid_locs = {"far left","center left","center","center right","far right"}
            if flat.get(loc_k, "N/A") not in valid_locs:
                flat[loc_k] = "N/A"
        else:  # vis == "no"
            flat[loc_k] = "N/A"
            valid_turns = {"left","right"}
            if flat.get(turn_k, "N/A") not in valid_turns:
                flat[turn_k] = "N/A"

    fix("end")
    fix("start")

    # Store canonical casing (match your prediction strings)
    # visible: yes/no already lowercase
    # location: lowercase; N/A uppercase
    for k in ["end_of_line_location_if_visible", "start_of_line_location_if_visible",
              "direction_to_turn_to_see_end_if_not_visible", "direction_to_turn_to_see_start_if_not_visible"]:
        v = flat.get(k, "N/A")
        flat[k] = "N/A" if v in ("n/a", "na", "") else v

    for k in ["end_of_line_visible", "start_of_line_visible"]:
        v = flat.get(k, "")
        flat[k] = "yes" if v == "yes" else ("no" if v == "no" else "")

    return flat

def normalize_visibility_fields(gt: dict) -> dict:
    """
    Fix inconsistent GT fields in-place according to the visibility rules.
    Returns a new dict (copy) with repaired fields.
    """
    gt = dict(gt)  # shallow copy

    def norm_side(prefix: str):
        # prefix in {"start", "end"}
        vis_k = f"{prefix}_of_line_visible"
        loc_k = f"{prefix}_of_line_location_if_visible"
        turn_k = f"direction_to_turn_to_see_{prefix}_if_not_visible"

        vis = str(gt.get(vis_k, "")).strip().lower()
        if vis not in ("yes", "no"):
            return  # leave as-is if missing/invalid

        # normalize to canonical case
        gt[vis_k] = "yes" if vis == "yes" else "no"

        if gt[vis_k] == "yes":
            # visible => turn must be N/A
            gt[turn_k] = "N/A"

            # location can stay if valid, otherwise N/A
            valid_locs = {"far left", "center left", "center", "center right", "far right"}
            loc = str(gt.get(loc_k, "N/A")).strip().lower()
            if loc in valid_locs:
                # store canonical case exactly
                gt[loc_k] = loc
            else:
                gt[loc_k] = "N/A"

        else:
            # not visible => location must be N/A
            gt[loc_k] = "N/A"

            # turn can stay if valid; otherwise N/A
            valid_turn = {"left", "right"}
            turn = str(gt.get(turn_k, "N/A")).strip().lower()
            if turn in valid_turn:
                gt[turn_k] = turn
            else:
                gt[turn_k] = "N/A"

    norm_side("end")
    norm_side("start")
    return gt

def get_images_with_gt(img_folder, gt_keys):
    """Same logic as in batch_queue_direction.py: match by basename (no extension)."""
    matched = []
    valid_exts = (".jpg", ".jpeg", ".png", ".webp", ".gif")
    for root, _, files in os.walk(img_folder):
        for fname in files:
            if fname.lower().endswith(valid_exts):
                key = os.path.splitext(fname)[0]
                if key in gt_keys:
                    matched.append(os.path.join(root, fname))
    return matched


def _bbox_edge_flags(bbox_xyxy, W, H, margin_px):
    x1, y1, x2, y2 = [float(v) for v in bbox_xyxy.tolist()]
    near_left   = x1 <= margin_px
    near_right  = x2 >= (W - 1 - margin_px)
    near_top    = y1 <= margin_px
    near_bottom = y2 >= (H - 1 - margin_px)
    touches_any = near_left or near_right or near_top or near_bottom
    return touches_any, near_left, near_right, near_top, near_bottom


def _location_bucket_from_center_x(cx, W):
    r = cx / max(W, 1)
    if r < 0.2:
        return "far left"
    elif r < 0.4:
        return "center left"
    elif r < 0.6:
        return "center"
    elif r < 0.8:
        return "center right"
    else:
        return "far right"


def endpoint_fields(bbox_xyxy, W, H, margin_px):
    """
    Implements your rule:
      - if bbox touches/is close to any edge -> not visible
      - if not visible: turn left if near left edge else right
      - if visible: location bucket by bbox center x
    """
    x1, y1, x2, y2 = [float(v) for v in bbox_xyxy.tolist()]
    cx = 0.5 * (x1 + x2)

    touches_any, near_left, near_right, near_top, near_bottom = _bbox_edge_flags(
        bbox_xyxy, W, H, margin_px
    )

    if touches_any:
        visible = "no"
        location = "N/A"
        turn = "left" if near_left else "right"
    else:
        visible = "yes"
        location = _location_bucket_from_center_x(cx, W)
        turn = "N/A"

    return visible, location, turn


def process_one_image(img_path, gt_entry, margin_px=10):
    image_id = os.path.splitext(os.path.basename(img_path))[0]

    # Paths
    depth_path = os.path.join(DEPTH_DIR, image_id + ".npy")
    bbox_path  = os.path.join(BBOX_ORIENT_DIR, image_id + "_bboxes.npy")
    orient_path = os.path.join(BBOX_ORIENT_DIR, image_id + "_orient.npy")

    # Required files check
    for p in [depth_path, bbox_path, orient_path, FOCAL_TXT]:
        if not os.path.isfile(p):
            return None, f"missing:{p}"

    # Read image for W,H
    img = cv2.imread(img_path)
    if img is None:
        return None, "missing-image"
    H, W = img.shape[:2]

    # Look up focal length
    try:
        f_px = load_fpx_from_txt(FOCAL_TXT, image_id)
    except Exception as e:
        return None, f"missing-fpx:{e}"

    # Identify start/end bboxes
    try:
        res = identify_start_end_bboxes(
            image_path=img_path,
            depth_npy_path=depth_path,
            bboxes_npy_path=bbox_path,
            orient_npy_path=orient_path,
            f_px=f_px,
        )
    except Exception as e:
        return None, f"fail-identify:{e}"

    start_bbox = res["start_bbox_xyxy"]  # START = head
    end_bbox   = res["end_bbox_xyxy"]    # END = tail

    # Compute fields
    end_visible, end_loc, end_turn = endpoint_fields(end_bbox, W, H, margin_px)
    start_visible, start_loc, start_turn = endpoint_fields(start_bbox, W, H, margin_px)

    pred = {
        "image_id": image_id,
        "image_path": img_path,

        "end_of_line_visible": end_visible,
        "end_of_line_location_if_visible": end_loc,
        "direction_to_turn_to_see_end_if_not_visible": end_turn,

        "start_of_line_visible": start_visible,
        "start_of_line_location_if_visible": start_loc,
        "direction_to_turn_to_see_start_if_not_visible": start_turn,
    }

    # Pull GT fields if present
    gt = {}
    if isinstance(gt_entry, dict):
        for k in [
            "end_of_line_visible",
            "end_of_line_location_if_visible",
            "direction_to_turn_to_see_end_if_not_visible",
            "start_of_line_visible",
            "start_of_line_location_if_visible",
            "direction_to_turn_to_see_start_if_not_visible",
        ]:
            if k in gt_entry:
                gt[k] = gt_entry[k]

    return (pred, gt), "ok"


def main():
    # Load GT jsons (same as batch_queue_direction.py)
    gt_dict, gt_paths = load_all_json_recursive_with_paths(GT_FOLDER)
    gt_keys = set(gt_dict.keys())
    print(f"Loaded {len(gt_keys)} GT JSONs.")

    # Find matching images (same logic)
    image_paths = get_images_with_gt(IMG_FOLDER, gt_keys)
    print(f"Found {len(image_paths)} images that have GT JSONs.")

    margin_px = 10  # tweak if needed

    # Collect results
    rows = []
    correct = {k: 0 for k in [
        "end_of_line_visible",
        "end_of_line_location_if_visible",
        "direction_to_turn_to_see_end_if_not_visible",
        "start_of_line_visible",
        "start_of_line_location_if_visible",
        "direction_to_turn_to_see_start_if_not_visible",
    ]}
    total = {k: 0 for k in correct.keys()}

    failures = 0

    for img_path in image_paths:
        image_id = os.path.splitext(os.path.basename(img_path))[0]

        # 1) Build FLAT + FIXED GT (nested -> flat, and enforce N/A rules)
        gt_flat = flatten_and_fix_gt(gt_dict.get(image_id, {}))

        # 2) Run prediction; pass gt_flat in (optional) for logging
        out, status = process_one_image(img_path, gt_flat, margin_px=margin_px)
        if status != "ok":
            failures += 1
            rows.append({
                "image_id": image_id,
                "image_path": img_path,
                "status": status,
            })
            print(f"[WARN] {image_id}: {status}")
            continue

        pred, gt = out  # gt should be the flat dict (or subset) returned by process_one_image

        # add GT columns + compute accuracy
        row = dict(pred)
        row["status"] = "ok"

        # write GT columns (only non-empty)
        for k, v in gt.items():
            if v != "" and v is not None:
                row[f"gt_{k}"] = v

        # score
        for k in correct.keys():
            if gt.get(k, "") != "":   # field exists in GT
                total[k] += 1
                if str(pred[k]).strip().lower() == str(gt[k]).strip().lower():
                    correct[k] += 1

        rows.append(row)
        # Print only incorrect visibility cases
        end_vis_wrong = (
            gt.get("end_of_line_visible", "") != "" and
            pred["end_of_line_visible"] != gt["end_of_line_visible"]
        )

        start_vis_wrong = (
            gt.get("start_of_line_visible", "") != "" and
            pred["start_of_line_visible"] != gt["start_of_line_visible"]
        )

        if end_vis_wrong or start_vis_wrong:
            end_loc = pred["end_of_line_location_if_visible"] if pred["end_of_line_visible"] == "yes" else "N/A"
            gt_end_loc = gt.get("end_of_line_location_if_visible", "N/A")

            start_loc = pred["start_of_line_location_if_visible"] if pred["start_of_line_visible"] == "yes" else "N/A"
            gt_start_loc = gt.get("start_of_line_location_if_visible", "N/A")

            print(
                f"[VIS ERROR] {image_id} | "
                f"end: pred={pred['end_of_line_visible']} "
                f"(loc={end_loc}) gt={gt.get('end_of_line_visible')} "
                f"start: pred={pred['start_of_line_visible']} "
                f"(loc={start_loc}) gt={gt.get('start_of_line_visible')} "
            )

    # Print accuracies
    print("\n=== Accuracy (only where GT field exists) ===")
    for k in correct.keys():
        if total[k] == 0:
            print(f"{k}: N/A (no GT)")
        else:
            acc = correct[k] / total[k]
            print(f"{k}: {acc:.4f} ({correct[k]}/{total[k]})")

    print(f"\nFailures: {failures}/{len(image_paths)}")

    # Save CSV
    out_csv = "line_visibility_results.csv"
    # gather all possible columns (pred + gt + status)
    all_cols = set()
    for r in rows:
        all_cols.update(r.keys())
    all_cols = sorted(all_cols)

    with open(out_csv, "w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=all_cols)
        w.writeheader()
        for r in rows:
            w.writerow(r)

    print(f"\nSaved: {out_csv}")


if __name__ == "__main__":
    main()