Spaces:

AGLO-AI
/

raster2seq

Runtime error

anas commited on Apr 6

Commit

fadb92b

1 Parent(s): 54d83cd

Initial deployment of Raster2Seq floor plan vectorization API

- Full inference pipeline with Gradio UI and API endpoint
- Docker-based deployment with CUDA 11.8 for GPU inference
- Semantic room detection with polygon output (13 room types)
- Checkpoint auto-downloaded during Docker build

Made-with: Cursor

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +15 -0
Dockerfile +27 -0
LICENSE +21 -0
README.md +24 -5
app.py +303 -0
data_preprocess/README.md +40 -0
data_preprocess/common_utils.py +45 -0
data_preprocess/cubicasa5k/augmentations.py +703 -0
data_preprocess/cubicasa5k/combine_json.py +118 -0
data_preprocess/cubicasa5k/create_coco_cc5k.py +672 -0
data_preprocess/cubicasa5k/floorplan_extraction.py +403 -0
data_preprocess/cubicasa5k/house.py +1131 -0
data_preprocess/cubicasa5k/loaders.py +158 -0
data_preprocess/cubicasa5k/plotting.py +820 -0
data_preprocess/cubicasa5k/run.sh +15 -0
data_preprocess/cubicasa5k/svg_utils.py +746 -0
data_preprocess/raster2graph/combine_json.py +122 -0
data_preprocess/raster2graph/combine_mapping_ids.py +95 -0
data_preprocess/raster2graph/convert_to_coco.py +472 -0
data_preprocess/raster2graph/dataset.py +296 -0
data_preprocess/raster2graph/image_process.py +67 -0
data_preprocess/raster2graph/util/data_utils.py +966 -0
data_preprocess/raster2graph/util/edges_utils.py +46 -0
data_preprocess/raster2graph/util/geom_utils.py +124 -0
data_preprocess/raster2graph/util/graph_utils.py +879 -0
data_preprocess/raster2graph/util/image_id_dict.py +0 -0
data_preprocess/raster2graph/util/math_utils.py +7 -0
data_preprocess/raster2graph/util/mean_std.py +2 -0
data_preprocess/raster2graph/util/metric_utils.py +338 -0
data_preprocess/raster2graph/util/semantics_dict.py +45 -0
data_preprocess/stru3d/PointCloudReaderPanorama.py +253 -0
data_preprocess/stru3d/generate_coco_stru3d.py +199 -0
data_preprocess/stru3d/generate_point_cloud_stru3d.py +32 -0
data_preprocess/stru3d/stru3d_utils.py +244 -0
data_preprocess/tools/plot_data.sh +60 -0
data_preprocess/tools/run_cc5k.sh +15 -0
data_preprocess/tools/run_r2g.sh +12 -0
data_preprocess/tools/run_s3d.sh +22 -0
data_preprocess/tools/run_waffle.sh +3 -0
data_preprocess/waffle/create_coco_waffle_benchmark.py +290 -0
datasets/__init__.py +67 -0
datasets/data_utils.py +60 -0
datasets/discrete_tokenizer.py +60 -0
datasets/poly_data.py +590 -0
datasets/room_dropout.py +237 -0
datasets/transforms.py +46 -0
detectron2/__init__.py +10 -0
detectron2/checkpoint/__init__.py +11 -0
detectron2/checkpoint/c2_model_loading.py +387 -0
detectron2/checkpoint/catalog.py +113 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,15 @@

+__pycache__/
+*.pyc
+*.pyo
+*.egg-info/
+build/
+dist/
+data/
+data2/
+output*/
+wandb*/
+checkpoints/
+slurm_scripts*
+watch_folder
+cross_eval_out
+*.log

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
+RUN apt-get update && apt-get install -y \
+    python3.10 python3-pip git wget libgl1-mesa-glx libglib2.0-0 \
+    && rm -rf /var/lib/apt/lists/*
+RUN useradd -m -u 1000 user
+WORKDIR /app
+COPY . /app
+RUN pip3 install torch==2.3.1 torchvision==0.18.1 \
+    --index-url https://download.pytorch.org/whl/cu118
+RUN pip3 install -r requirements.txt
+RUN pip3 install gradio gdown
+RUN cd models/ops && sh make.sh && cd ../..
+RUN cd diff_ras && python3 setup.py build develop && cd ..
+RUN mkdir -p checkpoints && \
+    gdown --fuzzy "https://drive.google.com/file/d/1M32HlYwXw-4Q_uajSCvpbF31UFPzQVHP/view?usp=sharing" \
+    -O checkpoints/r2g_res256_ep0849.pth
+RUN chown -R user:user /app
+USER user
+EXPOSE 7860
+CMD ["python3", "app.py"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2026 Raster2Seq
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,10 +1,29 @@
 ---
-title: Raster2seq
-emoji: 🐢
-colorFrom: yellow
-colorTo: gray
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Raster2Seq
+emoji: 🏠
+colorFrom: blue
+colorTo: purple
 sdk: docker
 pinned: false
+app_port: 7860
 ---
+# Raster2Seq - Floor Plan Vectorization
+Upload a floor plan image to detect room polygons and their semantic labels.
+This Space runs the Raster2Seq model for converting raster floor plan images into vectorized polygon sequences with room type classification.
+## API Usage
+This Space exposes a Gradio API. You can call it programmatically:
+```python
+from gradio_client import Client
+client = Client("AGLO-AI/raster2seq")
+result = client.predict(
+    image="path/to/floorplan.png",
+    api_name="/predict"
+)
+```

app.py ADDED Viewed

	@@ -0,0 +1,303 @@

+import argparse
+import copy
+import json
+import math
+import cv2
+import gradio as gr
+import numpy as np
+import torch
+from PIL import Image
+from shapely.geometry import Polygon
+from datasets.discrete_tokenizer import DiscreteTokenizer
+from datasets.transforms import ResizeAndPad
+from detectron2.data import transforms as T
+from models import build_model
+from util.plot_utils import plot_semantic_rich_floorplan_opencv
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+MODEL_ARGS = argparse.Namespace(
+    poly2seq=True,
+    seq_len=512,
+    num_bins=32,
+    image_size=256,
+    input_channels=3,
+    backbone="resnet50",
+    dilation=False,
+    position_embedding="sine",
+    position_embedding_scale=2 * np.pi,
+    num_feature_levels=4,
+    enc_layers=6,
+    dec_layers=6,
+    dim_feedforward=1024,
+    hidden_dim=256,
+    dropout=0.1,
+    nheads=8,
+    num_queries=800,
+    num_polys=20,
+    dec_n_points=4,
+    enc_n_points=4,
+    query_pos_type="sine",
+    with_poly_refine=False,
+    masked_attn=False,
+    semantic_classes=13,
+    aux_loss=False,
+    dec_attn_concat_src=True,
+    pre_decoder_pos_embed=False,
+    learnable_dec_pe=False,
+    dec_qkv_proj=False,
+    per_token_sem_loss=True,
+    add_cls_token=False,
+    use_anchor=True,
+    inject_cls_embed=False,
+    device="cuda" if torch.cuda.is_available() else "cpu",
+)
+R2G_LABEL = {
+    0: "Living Room",
+    1: "Kitchen",
+    2: "Bedroom",
+    3: "Bathroom",
+    4: "Balcony",
+    5: "Corridor",
+    6: "Dining Room",
+    7: "Study",
+    8: "Studio",
+    9: "Store Room",
+    10: "Garden",
+    11: "Laundry Room",
+    12: "Office",
+    13: "Basement",
+    14: "Garage",
+    15: "Undefined",
+    16: "Door",
+    17: "Window",
+}
+def _process_predictions(
+    pred_corners, i, semantic_rich, image_size, pred_room_label,
+    pred_room_logits, per_token_sem_loss, add_cls_token=False,
+):
+    """Extract polygons from poly2seq model output."""
+    np_softmax = lambda x: np.exp(x) / np.sum(np.exp(x), axis=-1, keepdims=True)
+    pred_corners_per_scene = pred_corners[i]
+    room_polys = []
+    if semantic_rich:
+        room_types = []
+        window_doors = []
+        window_doors_types = []
+        pred_room_label_per_scene = pred_room_label[i].cpu().numpy()
+        pred_room_logit_per_scene = pred_room_logits[i].cpu().numpy()
+    all_room_polys = []
+    tmp = []
+    all_length_list = [0]
+    for j in range(len(pred_corners_per_scene)):
+        if isinstance(pred_corners_per_scene[j], int):
+            if pred_corners_per_scene[j] == 2 and tmp:
+                all_room_polys.append(tmp)
+                all_length_list.append(len(tmp) + 1 + add_cls_token)
+                tmp = []
+            continue
+        tmp.append(pred_corners_per_scene[j])
+    if len(tmp):
+        all_room_polys.append(tmp)
+        all_length_list.append(len(tmp) + 1 + add_cls_token)
+    start_poly_indices = np.cumsum(all_length_list)
+    final_pred_classes = []
+    for j, poly in enumerate(all_room_polys):
+        if len(poly) < 2:
+            continue
+        corners = np.array(poly, dtype=np.float32) * (image_size - 1)
+        corners = np.around(corners).astype(np.int32)
+        if not semantic_rich:
+            if len(corners) >= 4 and Polygon(corners).area >= 100:
+                room_polys.append(corners)
+        else:
+            if per_token_sem_loss:
+                pred_classes, counts = np.unique(
+                    pred_room_label_per_scene[start_poly_indices[j]:start_poly_indices[j + 1]][:-1],
+                    return_counts=True,
+                )
+                pred_class = pred_classes[np.argmax(counts)]
+            else:
+                pred_class = pred_room_label_per_scene[start_poly_indices[j + 1] - 1]
+            final_pred_classes.append(pred_class)
+            if len(corners) >= 3 and Polygon(corners).area >= 100:
+                room_polys.append(corners)
+                room_types.append(pred_class)
+            elif len(corners) == 2:
+                window_doors.append(corners)
+                window_doors_types.append(pred_class)
+    if not semantic_rich:
+        pred_room_label_per_scene = len(all_room_polys) * [-1]
+    return {
+        "room_polys": room_polys,
+        "room_types": room_types if semantic_rich else None,
+        "window_doors": window_doors if semantic_rich else None,
+        "window_doors_types": window_doors_types if semantic_rich else None,
+    }
+@torch.no_grad()
+def generate(model, samples, semantic_rich=False, use_cache=True, per_token_sem_loss=False):
+    """Generate room polygons from model predictions (poly2seq mode only)."""
+    model.eval()
+    image_size = samples[0].size(2)
+    outputs = model.forward_inference(samples, use_cache)
+    pred_corners = outputs["gen_out"]
+    bs = outputs["pred_logits"].shape[0]
+    pred_room_label = None
+    pred_room_logits = None
+    if "pred_room_logits" in outputs:
+        pred_room_logits = outputs["pred_room_logits"]
+        prob = torch.nn.functional.softmax(pred_room_logits, -1)
+        _, pred_room_label = prob[..., :-1].max(-1)
+    result_rooms = []
+    result_classes = []
+    for i in range(bs):
+        scene_outputs = _process_predictions(
+            pred_corners, i, semantic_rich, image_size,
+            pred_room_label, pred_room_logits, per_token_sem_loss,
+        )
+        room_polys = scene_outputs["room_polys"]
+        room_types = scene_outputs["room_types"]
+        window_doors = scene_outputs["window_doors"]
+        window_doors_types = scene_outputs["window_doors_types"]
+        if window_doors:
+            result_rooms.append(room_polys + window_doors)
+            result_classes.append(room_types + window_doors_types)
+        else:
+            result_rooms.append(room_polys)
+            result_classes.append(room_types)
+    return {"room": result_rooms, "labels": result_classes}
+def load_model():
+    tokenizer = DiscreteTokenizer(
+        MODEL_ARGS.num_bins, MODEL_ARGS.seq_len, add_cls=MODEL_ARGS.add_cls_token
+    )
+    MODEL_ARGS.vocab_size = len(tokenizer)
+    model = build_model(MODEL_ARGS, train=False, tokenizer=tokenizer)
+    model.to(DEVICE)
+    ckpt_path = "checkpoints/r2g_res256_ep0849.pth"
+    checkpoint = torch.load(ckpt_path, map_location="cpu")
+    ckpt_state_dict = copy.deepcopy(checkpoint["ema"])
+    for key in list(ckpt_state_dict.keys()):
+        if key.startswith("module."):
+            ckpt_state_dict[key[7:]] = ckpt_state_dict.pop(key)
+    model.load_state_dict(ckpt_state_dict, strict=False)
+    for param in model.parameters():
+        param.requires_grad = False
+    model.eval()
+    return model
+print("Loading model...")
+MODEL = load_model()
+print("Model loaded.")
+DATA_TRANSFORM = T.AugmentationList(
+    [ResizeAndPad((MODEL_ARGS.image_size, MODEL_ARGS.image_size), pad_value=255)]
+)
+def preprocess_image(pil_image: Image.Image) -> torch.Tensor:
+    image_np = np.array(pil_image.convert("RGB"))
+    aug_input = T.AugInput(image_np)
+    DATA_TRANSFORM(aug_input)
+    image_np = aug_input.image
+    if len(image_np.shape) == 2:
+        tensor = np.expand_dims(image_np, 0)
+    else:
+        tensor = image_np.transpose((2, 0, 1))
+    return (1 / 255) * torch.as_tensor(tensor, dtype=torch.float32)
+def predict_floorplan(image: Image.Image):
+    if image is None:
+        return None, json.dumps({"error": "No image provided"})
+    input_tensor = preprocess_image(image).unsqueeze(0).to(DEVICE)
+    outputs = generate(
+        MODEL,
+        input_tensor,
+        semantic_rich=MODEL_ARGS.semantic_classes > 0,
+        use_cache=True,
+        per_token_sem_loss=MODEL_ARGS.per_token_sem_loss,
+    )
+    pred_rooms = outputs["room"][0]
+    pred_labels = outputs["labels"][0]
+    image_size = MODEL_ARGS.image_size
+    if pred_labels is None:
+        pred_labels = [-1] * len(pred_rooms)
+    result_polygons = []
+    for poly, label in zip(pred_rooms, pred_labels):
+        coords = poly.astype(float).tolist()
+        result_polygons.append({
+            "label": R2G_LABEL.get(int(label), "Unknown"),
+            "label_id": int(label),
+            "polygon": coords,
+        })
+    floorplan_map = plot_semantic_rich_floorplan_opencv(
+        zip(pred_rooms, pred_labels),
+        None,
+        door_window_index=[],
+        semantics_label_mapping=R2G_LABEL,
+        plot_text=True,
+        one_color=False,
+        is_sem=True,
+        img_w=image_size * 2,
+        img_h=image_size * 2,
+        scale=2,
+    )
+    if floorplan_map is not None and floorplan_map.size > 0:
+        floorplan_rgb = cv2.cvtColor(floorplan_map, cv2.COLOR_BGR2RGB)
+        vis_image = Image.fromarray(floorplan_rgb)
+    else:
+        vis_image = None
+    return vis_image, result_polygons
+demo = gr.Interface(
+    fn=predict_floorplan,
+    inputs=gr.Image(type="pil", label="Floor Plan Image"),
+    outputs=[
+        gr.Image(type="pil", label="Detected Rooms"),
+        gr.JSON(label="Detected Polygons"),
+    ],
+    title="Raster2Seq - Floor Plan Vectorization",
+    description="Upload a floor plan image to detect room polygons and their semantic labels. Returns both a visualization and structured JSON with polygon coordinates.",
+)
+demo.launch(server_name="0.0.0.0", server_port=7860)

data_preprocess/README.md ADDED Viewed

	@@ -0,0 +1,40 @@

+## Data preprocessing
+### Structured3D
+Simply download preprocessed data by RoomFormer at [here](https://polybox.ethz.ch/index.php/s/wKYWFsQOXHnkwcG). For more details, please refer to [RoomFormer's instructions](https://github.com/ywyue/RoomFormer/tree/main/data_preprocess).
+To render binary floorplan images from GT annotations (as used in our paper), run `bash data_preprocess/tools/run_s3d.sh`.
+### CubiCasa5K
+Step 1: Download and extract [CubiCasa5K](https://zenodo.org/record/2613548) dataset.
+Step 2: Run `bash data_preprocess/cubicasa5k/run.sh`.
+### Raster2Graph
+The instruction mainly follows Raster2Graph's instruction.
+Step 1: Due to dataset proprietary restrictions, please apply for access to LIFULL HOME'S Data [here](https://www.nii.ac.jp/dsc/idr/en/lifull/).
+Step 2: After obtaining access, download only the "photo-rent-madori-full-00" folder, which contains approximately 300,000 images.
+Step 3: Apply for access to the annotation [here](https://docs.google.com/forms/d/e/1FAIpQLSexqNMjyvPMtPMPN7bSh_1u4Q27LZAT-S9lR_gpipNIMKV5lw/viewform).
+The package has 3 folders:
+- annot_npy, annot_json: the annotations saved in npy and json, respectively.
+- original_vector_boundary: boundary boxes of "LIFULL HOME'S Data" which is used to create centered 512x512 images.
+These folders should be saved in the same directory as `photo-rent-madori-full-00`. For example: `data/R2G_hr_dataset/`.
+Step 4: Run `bash data_preprocess/tools/run_r2g.sh`.
+### WAFFLE
+It is noted that since WAFFLE only provides segmentation masks for a subset of 100 examples, so we only process this subset for the evaluation, not for training.
+Step 1: Download data at [here](https://tauex-my.sharepoint.com/:f:/g/personal/hadarelor_tauex_tau_ac_il/EqMX9nRbJ9xFiK7dR_m07b8BldS2saoZ4-ockqncJb_Hrg?e=zGIuos)
+Step 2: Run `bash data_preprocess/tools/run_waffle.sh`.
+## Data visualization
+Please refer to this script [tools/plot_data.sh](tools/plot_data.sh).

data_preprocess/common_utils.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import os
+import cv2
+import numpy as np
+from plyfile import PlyData
+def read_scene_pc(file_path):
+    with open(file_path, "rb") as f:
+        plydata = PlyData.read(f)
+        dtype = plydata["vertex"].data.dtype
+    print("dtype of file{}: {}".format(file_path, dtype))
+    points_data = np.array(plydata["vertex"].data.tolist())
+    return points_data
+def is_clockwise(points):
+    # points is a list of 2d points.
+    assert len(points) > 0
+    s = 0.0
+    for p1, p2 in zip(points, points[1:] + [points[0]]):
+        s += (p2[0] - p1[0]) * (p2[1] + p1[1])
+    return s > 0.0
+def resort_corners(corners):
+    # re-find the starting point and sort corners clockwisely
+    x_y_square_sum = corners[:, 0] ** 2 + corners[:, 1] ** 2
+    start_corner_idx = np.argmin(x_y_square_sum)
+    corners_sorted = np.concatenate([corners[start_corner_idx:], corners[:start_corner_idx]])
+    ## sort points clockwise
+    if not is_clockwise(corners_sorted[:, :2].tolist()):
+        corners_sorted[1:] = np.flip(corners_sorted[1:], 0)
+    return corners
+def export_density(density_map, out_folder, scene_id):
+    density_path = os.path.join(out_folder, scene_id + ".png")
+    density_uint8 = (density_map * 255).astype(np.uint8)
+    cv2.imwrite(density_path, density_uint8)

data_preprocess/cubicasa5k/augmentations.py ADDED Viewed

	@@ -0,0 +1,703 @@

+import random
+from math import inf
+import cv2
+import numpy as np
+import torch
+from floortrans.loaders import svg_utils
+class Compose(object):
+    def __init__(self, augmentations):
+        self.augmentations = augmentations
+    def __call__(self, sample):
+        for a in self.augmentations:
+            sample = a(sample)
+        return sample
+# 0. I
+# 1. I top to right
+# 2. I vertical flip
+# 3. I top to left
+# 4. L horizontal flip
+# 5. L
+# 6. L vertical flip
+# 7. L horizontal and vertical flip
+# 8. T
+# 9. T top to right
+# 10. T top to down
+# 11. T top to left
+# 12. X or +
+# 13. Opening left corner
+# 14. Opening right corner
+# 15. Opening up corner
+# 16. Opening down corer
+# 17. Icon upper left
+# 18. Icon upper right
+# 19. Icon lower left
+# 20. Icon lower right
+class RandomRotations(object):
+    def __init__(self, format="furu"):
+        if format == "furu":
+            self.augment = self.furu
+        elif format == "cubi":
+            self.augment = self.cubi
+    def __call__(self, sample):
+        return self.augment(sample)
+    def cubi(self, sample):
+        fplan = sample["image"]
+        segmentation = sample["label"]
+        heatmap_points = sample["heatmaps"]
+        scale = sample["scale"]
+        num_of_rotations = int(torch.randint(0, 3, (1,)))
+        hmapp_convert_map = {
+            0: 1,
+            1: 2,
+            2: 3,
+            3: 0,
+            4: 5,
+            5: 6,
+            6: 7,
+            7: 4,
+            8: 9,
+            9: 10,
+            10: 11,
+            11: 8,
+            12: 12,
+            13: 15,
+            14: 16,
+            15: 14,
+            16: 13,
+            17: 18,
+            18: 20,
+            19: 17,
+            20: 19,
+        }
+        for i in range(num_of_rotations):
+            fplan = fplan.transpose(2, 1).flip(2)
+            segmentation = segmentation.transpose(2, 1).flip(2)
+            points_rotated = dict()
+            for junction_type, points in heatmap_points.items():
+                new_junction_type = hmapp_convert_map[junction_type]
+                new_heatmap_points = []
+                for point in points:
+                    x = fplan.shape[1] - 1 - point[1]
+                    y = point[0]
+                    # if y > 256 or x > 256:
+                    # __import__('ipdb').set_trace()
+                    new_heatmap_points.append([x, y])
+                points_rotated[new_junction_type] = new_heatmap_points
+            heatmap_points = points_rotated
+        sample = {"image": fplan, "label": segmentation, "scale": scale, "heatmaps": heatmap_points}
+        return sample
+    def furu(self, sample):
+        fplan = sample["image"]
+        segmentation = sample["label"]
+        heatmap_points = sample["heatmap_points"]
+        num_of_rotations = int(torch.randint(0, 3, (1,)))
+        for i in range(num_of_rotations):
+            fplan = fplan.transpose(2, 1).flip(2)
+            segmentation = segmentation.transpose(2, 1).flip(2)
+            hmapp_convert_map = {
+                0: 1,
+                1: 2,
+                2: 3,
+                3: 0,
+                4: 5,
+                5: 6,
+                6: 7,
+                7: 4,
+                8: 9,
+                9: 10,
+                10: 11,
+                11: 8,
+                12: 12,
+                13: 15,
+                14: 16,
+                15: 14,
+                16: 13,
+                17: 18,
+                18: 20,
+                19: 17,
+                20: 19,
+            }
+            points_rotated = dict()
+            for junction_type, points in heatmap_points.items():
+                new_junction_type = hmapp_convert_map[junction_type]
+                new_heatmap_points = []
+                for point in points:
+                    new_heatmap_points.append([fplan.shape[1] - 1 - point[1], point[0]])
+                points_rotated[new_junction_type] = new_heatmap_points
+            heatmap_points = points_rotated
+        sample = {"image": fplan, "label": segmentation, "heatmap_points": heatmap_points}
+        return sample
+def clip_heatmaps(heatmaps, minx, maxx, miny, maxy):
+    def clip(p):
+        return p[0] < maxx and p[0] >= minx and p[1] < maxy and p[1] >= miny
+    res = {}
+    for key, value in heatmaps.items():
+        res[key] = list(filter(clip, value))
+        for i, e in enumerate(res[key]):
+            res[key][i] = (e[0] - minx, e[1] - miny)
+    return res
+class DictToTensor(object):
+    def __init__(self, data_format="cubi"):
+        if data_format == "cubi":
+            self.augment = self.cubi
+        elif data_format == "furukawa":
+            self.augment = self.furukawa
+    def __call__(self, sample):
+        return self.augment(sample)
+    def cubi(self, sample):
+        image, label = sample["image"], sample["label"]
+        _, height, width = label.shape
+        heatmaps = sample["heatmaps"]
+        scale = sample["scale"]
+        heatmap_tensor = np.zeros((21, height, width))
+        for channel, coords in heatmaps.items():
+            for x, y in coords:
+                if x >= width:
+                    x -= 1
+                if y >= height:
+                    y -= 1
+                heatmap_tensor[int(channel), int(y), int(x)] = 1
+        # Gaussian filter
+        kernel = svg_utils.get_gaussian2D(int(30 * scale))
+        for i, h in enumerate(heatmap_tensor):
+            heatmap_tensor[i] = cv2.filter2D(h, -1, kernel)
+        heatmap_tensor = torch.FloatTensor(heatmap_tensor)
+        label = torch.cat((heatmap_tensor, label), 0)
+        return {"image": image, "label": label}
+    def furukawa(self, sample):
+        image, label = sample["image"], sample["label"]
+        _, height, width = label.shape
+        heatmap_points = sample["heatmap_points"]
+        heatmap_tensor = np.zeros((21, height, width))
+        for channel, coords in heatmap_points.items():
+            for x, y in coords:
+                heatmap_tensor[int(channel), int(y), int(x)] = 1
+        # Gaussian filter
+        kernel = svg_utils.get_gaussian2D(13)
+        for i, h in enumerate(heatmap_tensor):
+            heatmap_tensor[i] = cv2.filter2D(h, -1, kernel, borderType=cv2.BORDER_CONSTANT, delta=0)
+        heatmap_tensor = torch.FloatTensor(heatmap_tensor)
+        label = torch.cat((heatmap_tensor, label), 0)
+        return {"image": image, "label": label}
+class RotateNTurns(object):
+    def rot_tensor(self, t, n):
+        # One turn clock wise
+        if n == 1:
+            t = t.flip(2).transpose(3, 2)
+        # One turn counter clock wise
+        elif n == -1:
+            t = t.transpose(3, 2).flip(2)
+        # Two turns clock wise
+        elif n == 2:
+            t = t.flip(2).flip(3)
+        return t
+    def rot_points(self, t, n):
+        # Swapping corner ts
+        t_sorted = t.clone().detach()
+        # One turn clock wise
+        if n == 1:
+            # I junctions
+            t_sorted[:, 1] = t[:, 0]
+            t_sorted[:, 2] = t[:, 1]
+            t_sorted[:, 3] = t[:, 2]
+            t_sorted[:, 0] = t[:, 3]
+            # L junctions
+            t_sorted[:, 5] = t[:, 4]
+            t_sorted[:, 6] = t[:, 5]
+            t_sorted[:, 7] = t[:, 6]
+            t_sorted[:, 4] = t[:, 7]
+            # T junctions
+            t_sorted[:, 9] = t[:, 8]
+            t_sorted[:, 10] = t[:, 9]
+            t_sorted[:, 11] = t[:, 10]
+            t_sorted[:, 8] = t[:, 11]
+            # Opening corners
+            t_sorted[:, 15] = t[:, 13]
+            t_sorted[:, 16] = t[:, 14]
+            t_sorted[:, 14] = t[:, 15]
+            t_sorted[:, 13] = t[:, 16]
+            # Icon corners
+            t_sorted[:, 18] = t[:, 17]
+            t_sorted[:, 20] = t[:, 18]
+            t_sorted[:, 17] = t[:, 19]
+            t_sorted[:, 19] = t[:, 20]
+        # One turn counter clock wise
+        elif n == -1:
+            # I junctions
+            t_sorted[:, 3] = t[:, 0]
+            t_sorted[:, 0] = t[:, 1]
+            t_sorted[:, 1] = t[:, 2]
+            t_sorted[:, 2] = t[:, 3]
+            # L junctions
+            t_sorted[:, 7] = t[:, 4]
+            t_sorted[:, 4] = t[:, 5]
+            t_sorted[:, 5] = t[:, 6]
+            t_sorted[:, 6] = t[:, 7]
+            # T junctions
+            t_sorted[:, 11] = t[:, 8]
+            t_sorted[:, 8] = t[:, 9]
+            t_sorted[:, 9] = t[:, 10]
+            t_sorted[:, 10] = t[:, 11]
+            # Opening corners
+            t_sorted[:, 16] = t[:, 13]
+            t_sorted[:, 15] = t[:, 14]
+            t_sorted[:, 13] = t[:, 15]
+            t_sorted[:, 14] = t[:, 16]
+            # Icon corners
+            t_sorted[:, 19] = t[:, 17]
+            t_sorted[:, 17] = t[:, 18]
+            t_sorted[:, 20] = t[:, 19]
+            t_sorted[:, 18] = t[:, 20]
+        # Two turns clock wise
+        elif n == 2:
+            t_sorted = t.clone().detach()
+            # I junctions
+            t_sorted[:, 2] = t[:, 0]
+            t_sorted[:, 3] = t[:, 1]
+            t_sorted[:, 0] = t[:, 2]
+            t_sorted[:, 4] = t[:, 3]
+            # L junctions
+            t_sorted[:, 6] = t[:, 4]
+            t_sorted[:, 7] = t[:, 5]
+            t_sorted[:, 4] = t[:, 6]
+            t_sorted[:, 5] = t[:, 7]
+            # T junctions
+            t_sorted[:, 10] = t[:, 8]
+            t_sorted[:, 11] = t[:, 9]
+            t_sorted[:, 8] = t[:, 10]
+            t_sorted[:, 9] = t[:, 11]
+            # Opening corners
+            t_sorted[:, 14] = t[:, 13]
+            t_sorted[:, 13] = t[:, 14]
+            t_sorted[:, 16] = t[:, 15]
+            t_sorted[:, 15] = t[:, 16]
+            # Icon corners
+            t_sorted[:, 20] = t[:, 17]
+            t_sorted[:, 19] = t[:, 18]
+            t_sorted[:, 18] = t[:, 19]
+            t_sorted[:, 17] = t[:, 20]
+        elif n == 0:
+            return t_sorted
+        return t_sorted
+    def __call__(self, sample, data_type, n):
+        if data_type == "tensor":
+            return self.rot_tensor(sample, n)
+        elif data_type == "points":
+            return self.rot_points(sample, n)
+class RandomCropToSizeTorch(object):
+    def __init__(
+        self,
+        input_slice=[21, 1, 1],
+        size=(256, 256),
+        fill=(0, 0),
+        data_format="tensor",
+        dtype=torch.float32,
+        max_size=None,
+    ):
+        self.size = size
+        self.width = size[0]
+        self.height = size[1]
+        self.dtype = dtype
+        self.fill = fill
+        self.max_size = max_size
+        self.input_slice = input_slice
+        if data_format == "dict":
+            self.augment = self.augment_dict
+        elif data_format == "tensor":
+            self.augment = self.augment_tesor
+        elif data_format == "dict furu":
+            self.augment = self.augment_dict_furu
+    def __call__(self, sample):
+        return self.augment(sample)
+    def augment_tesor(self, sample):
+        image, label = sample["image"], sample["label"]
+        img_w = image.shape[2]
+        img_h = image.shape[1]
+        pad_w = int(self.width / 2)
+        pad_h = int(self.height / 2)
+        new_w = self.width + max(img_w, self.width)
+        new_h = self.height + max(img_h, self.height)
+        new_image = torch.zeros([image.shape[0], new_h, new_w], dtype=self.dtype)
+        new_image[:, pad_h : img_h + pad_h, pad_w : img_w + pad_w] = image
+        new_heatmaps = torch.zeros([self.input_slice[0], new_h, new_w], dtype=self.dtype)
+        new_heatmaps[:, pad_h : img_h + pad_h, pad_w : img_w + pad_w] = label[: self.input_slice[0]]
+        new_rooms = torch.full((self.input_slice[1], new_h, new_w), self.fill[0])
+        new_rooms[:, pad_h : img_h + pad_h, pad_w : img_w + pad_w] = label[self.input_slice[0]]
+        new_icons = torch.full((self.input_slice[2], new_h, new_w), self.fill[1])
+        new_icons[:, pad_h : img_h + pad_h, pad_w : img_w + pad_w] = label[self.input_slice[0] + self.input_slice[1]]
+        label = torch.cat((new_heatmaps, new_rooms, new_icons), 0)
+        image = new_image
+        removed_up = random.randint(0, new_h - self.width)
+        removed_left = random.randint(0, new_w - self.height)
+        removed_down = new_h - self.height - removed_up
+        removed_right = new_w - self.width - removed_left
+        if removed_down == 0 and removed_right == 0:
+            image = image[:, removed_up:, removed_left:]
+            label = label[:, removed_up:, removed_left:]
+        elif removed_down == 0:
+            image = image[:, removed_up:, removed_left:-removed_right]
+            label = label[:, removed_up:, removed_left:-removed_right]
+        elif removed_right == 0:
+            image = image[:, removed_up:-removed_down, removed_left:]
+            label = label[:, removed_up:-removed_down, removed_left:]
+        else:
+            image = image[:, removed_up:-removed_down, removed_left:-removed_right]
+            label = label[:, removed_up:-removed_down, removed_left:-removed_right]
+        return {"image": image, "label": label}
+    def augment_dict(self, sample):
+        image, label = sample["image"], sample["label"]
+        heatmap_points = sample["heatmaps"]
+        img_w = image.shape[2]
+        img_h = image.shape[1]
+        pad_w = int(self.width / 2)
+        pad_h = int(self.height / 2)
+        new_w = self.width + img_w
+        new_h = self.height + img_h
+        new_image = torch.full([image.shape[0], new_h, new_w], 255)
+        new_image[:, pad_h : img_h + pad_h, pad_w : img_w + pad_w] = image
+        new_rooms = torch.full((1, new_h, new_w), self.fill[0])
+        new_rooms[:, pad_h : img_h + pad_h, pad_w : img_w + pad_w] = label[0]
+        new_icons = torch.full((1, new_h, new_w), self.fill[1])
+        new_icons[:, pad_h : img_h + pad_h, pad_w : img_w + pad_w] = label[1]
+        label = torch.cat((new_rooms, new_icons), 0)
+        image = new_image
+        removed_up = random.randint(0, new_h - self.width)
+        removed_left = random.randint(0, new_w - self.height)
+        removed_down = new_h - self.height - removed_up
+        removed_right = new_w - self.width - removed_left
+        new_heatmap_points = dict()
+        for junction_type, points in heatmap_points.items():
+            new_heatmap_points_per_type = []
+            for point in points:
+                new_heatmap_points_per_type.append([point[0] + pad_w, point[1] + pad_h])
+                new_heatmap_points[junction_type] = new_heatmap_points_per_type
+        heatmap_points = new_heatmap_points
+        if removed_down == 0 and removed_right == 0:
+            image = image[:, removed_up:, removed_left:]
+            label = label[:, removed_up:, removed_left:]
+            heatmap_points = clip_heatmaps(heatmap_points, removed_left, inf, removed_up, inf)
+        elif removed_down == 0:
+            image = image[:, removed_up:, removed_left:-removed_right]
+            label = label[:, removed_up:, removed_left:-removed_right]
+            heatmap_points = clip_heatmaps(heatmap_points, removed_left, removed_left + self.width, removed_up, inf)
+        elif removed_right == 0:
+            image = image[:, removed_up:-removed_down, removed_left:]
+            label = label[:, removed_up:-removed_down, removed_left:]
+            heatmap_points = clip_heatmaps(heatmap_points, removed_left, inf, removed_up, removed_up + self.width)
+        else:
+            image = image[:, removed_up:-removed_down, removed_left:-removed_right]
+            label = label[:, removed_up:-removed_down, removed_left:-removed_right]
+            heatmap_points = clip_heatmaps(
+                heatmap_points, removed_left, removed_left + self.width, removed_up, removed_up + self.height
+            )
+        return {"image": image, "label": label, "heatmaps": heatmap_points, "scale": sample["scale"]}
+    def augment_dict_furu(self, sample):
+        image, label = sample["image"], sample["label"]
+        heatmap_points = sample["heatmap_points"]
+        img_w = image.shape[2]
+        img_h = image.shape[1]
+        pad_w = int(self.width / 2)
+        pad_h = int(self.height / 2)
+        new_w = self.width + img_w
+        new_h = self.height + img_h
+        new_image = torch.full([image.shape[0], new_h, new_w], 255)
+        new_image[:, pad_h : img_h + pad_h, pad_w : img_w + pad_w] = image
+        new_rooms = torch.full((1, new_h, new_w), self.fill[0])
+        new_rooms[:, pad_h : img_h + pad_h, pad_w : img_w + pad_w] = label[0]
+        new_icons = torch.full((1, new_h, new_w), self.fill[1])
+        new_icons[:, pad_h : img_h + pad_h, pad_w : img_w + pad_w] = label[1]
+        label = torch.cat((new_rooms, new_icons), 0)
+        image = new_image
+        removed_up = random.randint(0, new_h - self.width)
+        removed_left = random.randint(0, new_w - self.height)
+        removed_down = new_h - self.height - removed_up
+        removed_right = new_w - self.width - removed_left
+        new_heatmap_points = dict()
+        for junction_type, points in heatmap_points.items():
+            new_heatmap_points_per_type = []
+            for point in points:
+                new_heatmap_points_per_type.append([point[0] + pad_w, point[1] + pad_h])
+                new_heatmap_points[junction_type] = new_heatmap_points_per_type
+        heatmap_points = new_heatmap_points
+        if removed_down == 0 and removed_right == 0:
+            image = image[:, removed_up:, removed_left:]
+            label = label[:, removed_up:, removed_left:]
+            heatmap_points = clip_heatmaps(heatmap_points, removed_left, inf, removed_up, inf)
+        elif removed_down == 0:
+            image = image[:, removed_up:, removed_left:-removed_right]
+            label = label[:, removed_up:, removed_left:-removed_right]
+            heatmap_points = clip_heatmaps(heatmap_points, removed_left, removed_left + self.width, removed_up, inf)
+        elif removed_right == 0:
+            image = image[:, removed_up:-removed_down, removed_left:]
+            label = label[:, removed_up:-removed_down, removed_left:]
+            heatmap_points = clip_heatmaps(heatmap_points, removed_left, inf, removed_up, removed_up + self.width)
+        else:
+            image = image[:, removed_up:-removed_down, removed_left:-removed_right]
+            label = label[:, removed_up:-removed_down, removed_left:-removed_right]
+            heatmap_points = clip_heatmaps(
+                heatmap_points, removed_left, removed_left + self.width, removed_up, removed_up + self.height
+            )
+        return {"image": image, "label": label, "heatmap_points": heatmap_points}
+class ColorJitterTorch(object):
+    def __init__(self, b_var=0.4, c_var=0.4, s_var=0.4, dtype=torch.float32, version="dict"):
+        self.b_var = b_var
+        self.c_var = c_var
+        self.s_var = s_var
+        self.dtype = dtype
+        self.version = version
+    def __call__(self, sample):
+        res = sample
+        image = sample["image"]
+        image = self.brightness(image, self.b_var)
+        image = self.contrast(image, self.c_var)
+        image = self.saturation(image, self.s_var)
+        res["image"] = image
+        return res
+    def blend(self, img_1, img_2, var):
+        m = torch.tensor([0], dtype=self.dtype).uniform_(-var, var)
+        alpha = 1 + m
+        res = img_1 * alpha + (1 - alpha) * img_2
+        res = torch.clamp(res, min=0, max=255)
+        return res
+    def grayscale(self, img):
+        red = img[0] * 0.299
+        green = img[1] * 0.587
+        blue = img[2] * 0.114
+        gray = red + green + blue
+        gray = torch.clamp(gray, min=0, max=255)
+        res = torch.stack((gray, gray, gray), dim=0)
+        return res
+    def saturation(self, img, var):
+        res = self.grayscale(img)
+        res = self.blend(img, res, var)
+        return res
+    def brightness(self, img, var):
+        res = torch.zeros(img.shape)
+        res = self.blend(img, res, var)
+        return res
+    def contrast(self, img, var):
+        res = self.grayscale(img)
+        mean_color = res.mean()
+        res = torch.full(res.shape, mean_color)
+        res = self.blend(img, res, var)
+        return res
+class ResizePaddedTorch(object):
+    def __init__(self, fill, size=(256, 256), both=True, dtype=torch.float32, data_format="tensor"):
+        self.size = size
+        self.width = size[0]
+        self.height = size[1]
+        self.both = both
+        self.dtype = dtype
+        self.fill = fill
+        self.fill_cval = 255
+        if data_format == "tensor":
+            self.augment = self.augment_tensor
+        elif data_format == "dict furu":
+            self.augment = self.augment_dict_furu
+        elif data_format == "dict":
+            self.augment = self.augment_dict
+            self.fill_cval = 1
+    def __call__(self, sample):
+        # image 1: Bi-cubic interpolation as in original paper
+        image, _, _, _ = self.resize_padded(
+            sample["image"], self.size, fill_cval=self.fill_cval, image=True, mode="bilinear", aling_corners=False
+        )
+        sample["image"] = image
+        return self.augment(sample)
+    def augment_tensor(self, sample):
+        image, label = sample["image"], sample["label"]
+        if self.both:
+            # labels 0: Nearest-neighbor interpolation
+            heatmaps, _, _, _ = self.resize_padded(label[:21], self.size, mode="bilinear", aling_corners=False)
+            rooms_padded, _, _, _ = self.resize_padded(label[[21]], self.size, mode="nearest", fill_cval=self.fill[0])
+            icons_padded, _, _, _ = self.resize_padded(
+                label[[22]],
+                self.size,
+                mode="nearest",
+                fill_cval=self.fill[1],
+            )
+            label = torch.cat((heatmaps, rooms_padded, icons_padded), dim=0)
+        return {"image": image, "label": label}
+    def augment_dict_furu(self, sample):
+        image, label = sample["image"], sample["label"]
+        heatmap_points = sample["heatmap_points"]
+        rooms_padded, _, _, _ = self.resize_padded(label[[0]], self.size, mode="nearest", fill_cval=self.fill[0])
+        icons_padded, ratio, y_pad, x_pad = self.resize_padded(
+            label[[1]], self.size, mode="nearest", fill_cval=self.fill[1]
+        )
+        label = torch.cat((rooms_padded, icons_padded), dim=0)
+        new_heatmap_points = dict()
+        for junction_type, points in heatmap_points.items():
+            new_heatmap_points_per_type = []
+            for point in points:
+                # Indexing starts from 0 but when we multiply with the ratio we need to start from 0.
+                new_x = point[0] * ratio + x_pad
+                new_y = point[1] * ratio + y_pad
+                new_heatmap_points_per_type.append([new_x, new_y])
+                new_heatmap_points[junction_type] = new_heatmap_points_per_type
+        heatmap_points = new_heatmap_points
+        return {"image": image, "label": label, "heatmap_points": heatmap_points}
+    def augment_dict(self, sample):
+        image, label = sample["image"], sample["label"]
+        heatmap_points = sample["heatmaps"]
+        scale = sample["scale"]
+        rooms_padded, _, _, _ = self.resize_padded(label[[0]], self.size, mode="nearest", fill_cval=self.fill[0])
+        icons_padded, ratio, y_pad, x_pad = self.resize_padded(
+            label[[1]], self.size, mode="nearest", fill_cval=self.fill[1]
+        )
+        label = torch.cat((rooms_padded, icons_padded), dim=0)
+        new_heatmap_points = dict()
+        for junction_type, points in heatmap_points.items():
+            new_heatmap_points_per_type = []
+            for point in points:
+                # Indexing starts from 0 but when we multiply with the ratio we need to start from 0.
+                new_x = point[0] * ratio + x_pad
+                new_y = point[1] * ratio + y_pad
+                if new_y < 256 and new_x < 256 and new_y >= 0 and new_x >= 0:
+                    # __import__('ipdb').set_trace()
+                    new_heatmap_points_per_type.append([new_x, new_y])
+                    new_heatmap_points[junction_type] = new_heatmap_points_per_type
+        heatmap_points = new_heatmap_points
+        return {"image": image, "label": label, "heatmaps": heatmap_points, "scale": scale}
+    def resize_padded(self, img, new_shape, image=False, fill_cval=0, mode="nearest", aling_corners=None):
+        new_shape = torch.tensor([img.shape[0], new_shape[0], new_shape[1]], dtype=self.dtype)
+        old_shape = torch.tensor(img.shape, dtype=self.dtype)
+        ratio = (new_shape / old_shape).min()
+        img_s = torch.tensor(img.shape[1:], dtype=self.dtype)
+        interm_shape = (ratio * img_s).ceil()
+        interm_shape = [interm_shape[0], interm_shape[1]]
+        img = img.unsqueeze(0)
+        interm_img = torch.nn.functional.interpolate(img, size=interm_shape, mode=mode, align_corners=aling_corners)
+        interm_img = interm_img.squeeze(0)
+        a = (interm_img.shape[0], self.size[0], self.size[1])
+        new_img = torch.full(a, fill_cval)
+        x_pad = int((self.width - interm_img.shape[1]) / 2)
+        y_pad = int((self.height - interm_img.shape[2]) / 2)
+        new_img[:, x_pad : interm_img.shape[1] + x_pad, y_pad : interm_img.shape[2] + y_pad] = interm_img
+        return new_img, ratio, x_pad, y_pad

data_preprocess/cubicasa5k/combine_json.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import glob
+import json
+import os
+import shutil
+from pathlib import Path
+def combine_json_files(input_pattern, data_path, split_type, output_file, start_image_id=0):
+    """
+    Combines multiple COCO-style JSON annotation files into a single file.
+    Args:
+        input_pattern: Glob pattern to match the input JSON files (e.g., "annotations/*.json")
+        output_file: Path to the output combined JSON file
+    """
+    # Initialize combined data structure
+    combined_data = {"images": [], "annotations": [], "categories": []}
+    # Track image and annotation IDs to avoid duplicates
+    annotation_ids_seen = set()
+    next_image_id = start_image_id
+    next_annotation_id = 0
+    skip_file_list = []
+    image_id_mapping = {}
+    # Find all matching JSON files
+    json_files = sorted(glob.glob(input_pattern))
+    print(f"Found {len(json_files)} JSON files to combine")
+    # Process each file
+    for i, json_file in enumerate(json_files):
+        print(f"Processing file {i + 1}/{len(json_files)}: {json_file}")
+        with open(json_file, "r") as f:
+            data = json.load(f)
+        # Store categories from the first file
+        if i == 0 and data.get("categories"):
+            combined_data["categories"] = data["categories"]
+        # empty annos
+        if len(data["annotations"]) == 0:
+            skip_file_list.append(data["images"][0]["id"])
+            continue
+        # Process images
+        for image in data.get("images", []):
+            if image["id"] not in image_id_mapping:
+                image_id_mapping[image["id"]] = next_image_id
+            else:
+                skip_file_list.append(image["id"])
+                continue
+            image["id"] = next_image_id
+            next_image_id += 1
+            image["file_name"] = str(image["id"]).zfill(5) + ".png"
+            org_file_name = os.path.basename(json_file).replace(".json", ".png")
+            if image["file_name"] != org_file_name and os.path.exists(f"{data_path}/{split_type}/{org_file_name}"):
+                shutil.move(
+                    f"{data_path}/{split_type}/{org_file_name}", f"{data_path}/{split_type}/{image['file_name']}"
+                )
+            combined_data["images"].append(image)
+        # Process annotations
+        for annotation in data.get("annotations", []):
+            annotation["id"] = next_annotation_id
+            next_annotation_id += 1
+            annotation["image_id"] = image_id_mapping[annotation["image_id"]]
+            annotation_ids_seen.add(annotation["id"])
+            combined_data["annotations"].append(annotation)
+    # Write combined data to output file
+    output_path = Path(output_file)
+    output_path.parent.mkdir(exist_ok=True, parents=True)
+    with open(output_file, "w") as f:
+        json.dump(combined_data, f, indent=2)
+    with open(output_path.parent / f"{output_path.name.split('.')[0]}_image_id_mapping.json", "w") as f:
+        json.dump(image_id_mapping, f, indent=2)
+    if len(skip_file_list):
+        with open(output_path.parent / f"{output_path.name.split('.')[0]}_skipped.txt", "w") as f:
+            f.write("\n".join([str(x) for x in skip_file_list]))
+    print(f"Combined data written to {output_file}")
+    print(f"Total images: {len(combined_data['images'])}")
+    print(f"Total annotations: {len(combined_data['annotations'])}")
+    print(f"Total categories: {len(combined_data['categories'])}")
+    print(f"Skipped images: {len(skip_file_list)}")
+    return combined_data
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Combine multiple COCO-style JSON annotation files")
+    parser.add_argument("--input", required=True, help="Glob pattern for input JSON files, e.g., 'annotations/*.json'")
+    parser.add_argument("--output", required=True, help="Output JSON file path")
+    args = parser.parse_args()
+    splits = ["train", "val", "test"]
+    for i, split in enumerate(splits):
+        if split == "train":
+            start_image_id = 0
+        else:
+            start_image_id += len(list(Path(f"{args.input}/{splits[i - 1]}").glob("*.png")))
+        combine_json_files(
+            f"{args.input}/annotations_json/{split}/*.json",
+            args.input,
+            split,
+            f"{args.output}/{split}.json",
+            start_image_id=start_image_id,
+        )

data_preprocess/cubicasa5k/create_coco_cc5k.py ADDED Viewed

	@@ -0,0 +1,672 @@

+import argparse
+import json
+import os
+import sys
+from multiprocessing import Pool
+from pathlib import Path
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+from loaders import FloorplanSVG
+from matplotlib.patches import Patch
+from PIL import Image
+from shapely.geometry import Polygon
+from skimage import measure
+from tqdm import tqdm
+sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
+sys.path.append(str(Path(__file__).resolve().parent.parent))
+from common_utils import resort_corners
+from stru3d.stru3d_utils import type2id
+#### ORIGINAL ROOM NAMES & ICON_NAMES ####
+ROOM_NAMES = {
+    0: "Background",
+    1: "Outdoor",
+    2: "Wall",
+    3: "Kitchen",
+    4: "Living Room",
+    5: "Bed Room",
+    6: "Bath",
+    7: "Entry",
+    8: "Railing",
+    9: "Storage",
+    10: "Garage",
+    11: "Undefined",
+}
+ICON_NAMES = {
+    0: "No Icon",
+    1: "Window",
+    2: "Door",
+    3: "Closet",
+    4: "Electrical Applience",
+    5: "Toilet",
+    6: "Sink",
+    7: "Sauna Bench",
+    8: "Fire Place",
+    9: "Bathtub",
+    10: "Chimney",
+}
+CC5K_2_S3D_MAPPING = {
+    0: None,  # "Background"
+    1: type2id["balcony"],  # "Outdoor" -> balcony (4)
+    2: None,  # "Wall" has no direct match
+    3: type2id["kitchen"],  # Kitchen -> kitchen (1)
+    4: type2id["living room"],  # Living Room -> living room (0)
+    5: type2id["bedroom"],  # Bed Room -> bedroom (2)
+    6: type2id["bathroom"],  # Bath -> bathroom (3)
+    7: 18,  # 'Entry' has no direct match
+    8: 19,  # "Railing" has no direct match
+    9: type2id["store room"],  # Storage -> store room (9)
+    10: type2id["garage"],  # Garage -> garage (14)
+    11: type2id["undefined"],  # Undefined -> undefined (15)
+    12: type2id["window"],  # Window -> window (17)
+    13: type2id["door"],  # Door -> door (16)
+}
+CC5K_MAPPING = {
+    0: None,
+    1: 0,  # Outdoor
+    2: 1,  # Wall
+    3: 2,  # Kitchen
+    4: 3,  # Living Room
+    5: 4,  # Bed Room
+    6: 5,  # Bath
+    7: 6,  # Entry
+    8: 1,  # Railing -> Wall
+    9: 7,  # Storage
+    10: 8,  # Garage
+    11: 9,  # Undefined
+    12: 10,  # Window
+    13: 11,  # Door
+}
+CC5K_MAPPING_2 = {
+    0: None,
+    1: 0,  # Outdoor
+    2: None,  # Wall
+    3: 1,  # Kitchen
+    4: 2,  # Living Room
+    5: 3,  # Bed Room
+    6: 4,  # Bath
+    7: 5,  # Entry
+    8: None,  # Railing -> Wall
+    9: 6,  # Storage
+    10: 7,  # Garage
+    11: 8,  # Undefined
+    12: 9,  # Window
+    13: 10,  # Door
+}
+CC5K_CLASS_MAPPING = {
+    "Outdoor": 0,
+    "Wall, Railing": 1,
+    "Kitchen": 2,
+    "Living Room": 3,
+    "Bed Room": 4,
+    "Bath": 5,
+    "Entry": 6,
+    "Storage": 7,
+    "Garage": 8,
+    "Undefined": 9,
+    "Window": 10,
+    "Door": 11,
+}
+CC5K_CLASS_MAPPING_2 = {
+    "Outdoor": 0,
+    "Kitchen": 1,
+    "Living Room": 2,
+    "Bed Room": 3,
+    "Bath": 4,
+    "Entry": 5,
+    "Storage": 6,
+    "Garage": 7,
+    "Undefined": 8,
+    "Window": 9,
+    "Door": 10,
+}
+CLASS_MAPPING = {
+    "living room": 0,
+    "kitchen": 1,
+    "bedroom": 2,
+    "bathroom": 3,
+    "balcony": 4,
+    "corridor": 5,
+    "dining room": 6,
+    "study": 7,
+    "studio": 8,
+    "store room": 9,
+    "garden": 10,
+    "laundry room": 11,
+    "office": 12,
+    "basement": 13,
+    "garage": 14,
+    "undefined": 15,
+    "door": 16,
+    "window": 17,
+    "entry": 18,
+    "railing": 19,
+}
+def fill_holes_in_mask(binary_mask):
+    """
+    Fill 0-pixels in a binary mask that are completely surrounded by 1-pixels.
+    Args:
+        binary_mask (numpy.ndarray): Binary mask with 0 and 1 values.
+    Returns:
+        numpy.ndarray: Binary mask with holes filled.
+    """
+    # Ensure the mask is binary (0 and 1)
+    binary_mask = (binary_mask > 0).astype(np.uint8)
+    # Apply dilation
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 15))
+    binary_mask = cv2.dilate(binary_mask, kernel, iterations=1)
+    # Find contours in the mask
+    contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    # Fill the contours
+    filled_mask = binary_mask.copy()
+    cv2.fillPoly(filled_mask, contours, 1)
+    return filled_mask
+def close_contour(contour):
+    if not np.array_equal(contour[0], contour[-1]):
+        contour = np.vstack((contour, contour[0]))
+    return contour
+def binary_mask_to_polygon(binary_mask, tolerance=0):
+    """Converts a binary mask to COCO polygon representation
+    Ref: https://github.com/waspinator/pycococreator/blob/master/pycococreatortools/pycococreatortools.py
+    Args:
+        binary_mask: a 2D binary numpy array where '1's represent the object
+        tolerance: Maximum distance from original points of polygon to approximated
+            polygonal chain. If tolerance is 0, the original coordinate array is returned.
+    """
+    polygons = []
+    # pad mask to close contours of shapes which start and end at an edge
+    padded_binary_mask = np.pad(binary_mask, pad_width=1, mode="constant", constant_values=0)
+    contours = measure.find_contours(padded_binary_mask, 0.5)
+    contours = np.subtract(contours, 1)
+    for contour in contours:
+        contour = close_contour(contour)
+        contour = measure.approximate_polygon(contour, tolerance)
+        if len(contour) < 3:
+            continue
+        contour = np.flip(contour, axis=1)
+        segmentation = contour.ravel().tolist()
+        # after padding and subtracting 1 we may get -0.5 points in our segmentation
+        segmentation = [0 if i < 0 else i for i in segmentation]
+        polygons.append(segmentation)
+    return polygons
+def extract_icon_cv2(mask, start_cls_id=11, skip_classes=[]):
+    room_ids = np.unique(mask)
+    room_polygons = []
+    new_mask = np.zeros(mask.shape)
+    # window, door
+    for room_id in room_ids:
+        if room_id in skip_classes:
+            continue
+        true_room_id = int(room_id) + start_cls_id
+        # Create binary mask for this room
+        room_mask = (mask == room_id).astype(np.uint8)
+        new_mask = np.where(room_mask, true_room_id, 0)
+        # Find contours using OpenCV
+        contours, _ = cv2.findContours(room_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        if contours:
+            # # Get the largest contour
+            # largest_contour = max(contours, key=cv2.contourArea)
+            for cnt in contours:
+                polygon = [tuple(point[0]) for point in cnt]
+                if len(polygon) < 3:
+                    continue
+                poly = Polygon(polygon)
+                simplified_poly = poly.simplify(tolerance=0.5, preserve_topology=True)
+                simplified_poly = list(simplified_poly.exterior.coords)
+                room_polygons.append([simplified_poly, true_room_id])
+    return room_polygons, new_mask
+def visualize_room_polygons(mask, room_polygons, class_names, save_path="cubicasa_debug.png", bg_polygons=None):
+    """
+    Visualize the extracted room polygons.
+    Args:
+        mask: Original segmentation mask
+        room_polygons: Dictionary of room polygons as returned by extract_room_polygons
+        figsize: Figure size for the plot
+    """
+    # Set figure size to exactly 256x256 pixels
+    dpi = 100  # Standard screen DPI
+    figsize = (mask.shape[1] / dpi, mask.shape[0] / dpi)  # Convert pixels to inches
+    # Get unique classes from the mask
+    unique_classes = np.unique(mask)
+    # Create a discrete colormap
+    cmap = plt.cm.get_cmap("gist_ncar", 256)  # nipy_spectral
+    # cmap = ListedColormap([cmap(x) for x in np.linspace(0, 1, int(20))])
+    fig = plt.figure(figsize=figsize)
+    ax = fig.add_axes([0, 0, 1, 1])
+    plt.imshow(mask, cmap=cmap, interpolation="nearest", alpha=0.6, vmin=0, vmax=20)
+    # Plot each room polygon
+    for polygon, room_cls in room_polygons:
+        polygon_array = np.array(polygon).copy()
+        # # flip y
+        # polygon_array[:, 1] = mask.shape[0] - polygon_array[:, 1] - 1
+        ax.plot(polygon_array[:, 0], polygon_array[:, 1], "k-", linewidth=2)
+        # Add room ID label at the centroid
+        centroid_x = np.mean(polygon_array[:, 0])
+        centroid_y = np.mean(polygon_array[:, 1])
+        ax.text(
+            centroid_x,
+            centroid_y,
+            str(room_cls),
+            fontsize=12,
+            ha="center",
+            va="center",
+            bbox=dict(facecolor="white", alpha=0.7),
+        )
+    if bg_polygons is not None:
+        # Plot each room polygon
+        for polygon, room_cls in bg_polygons:
+            polygon_array = np.array(polygon).copy()
+            # # flip y
+            # polygon_array[:, 1] = mask.shape[0] - polygon_array[:, 1] - 1
+            ax.plot(polygon_array[:, 0], polygon_array[:, 1], "c-", linewidth=2)
+    # Create custom legend elements
+    legend_elements = []
+    norm = np.linspace(0, 1, 21)  # int(max(unique_classes))+1
+    for i, cls in enumerate(sorted(unique_classes)):
+        # if int(cls) == 0:
+        #     continue
+        # Get the exact same color that imshow uses
+        color = cmap(norm[int(cls)])
+        # color = cmap(int(cls))
+        cls_name = f"{int(cls)}_{class_names[int(cls)]}"
+        # You can replace f"Class {cls}" with your actual class names if available
+        legend_elements.append(Patch(facecolor=color, edgecolor="black", label=f"{cls_name}", alpha=0.6))
+    # Add the legend to the plot
+    ax.legend(
+        handles=legend_elements,
+        loc="best",
+        title="Classes",
+        fontsize=20,
+        markerscale=4,
+        title_fontsize=28,
+    )
+    # plt.title('Room Polygons Extracted from Segmentation Mask')
+    plt.axis("equal")
+    plt.axis("off")
+    fig.savefig(save_path, bbox_inches="tight", pad_inches=0)
+    plt.close()
+def config():
+    a = argparse.ArgumentParser(description="Generate coco format data for Structured3D")
+    a.add_argument(
+        "--data_root", default="Structured3D_panorama", type=str, help="path to raw Structured3D_panorama folder"
+    )
+    a.add_argument("--output", default="coco_cubicasa5k", type=str, help="path to output folder")
+    a.add_argument("--disable_wd2line", action="store_true")
+    args = a.parse_args()
+    return args
+def save_image(image_path, output_path, mask=None):
+    """
+    ref: https://github.com/ultralytics/ultralytics/issues/339
+    """
+    img = Image.open(image_path).convert("RGB")
+    img.info.pop("icc_profile", None)
+    if mask is not None:
+        img_array = np.array(img)
+        if len(mask.shape) == 2 and len(img_array.shape) == 3:
+            mask = mask[:, :, np.newaxis]
+        masked_img = np.where(mask == 0, 255, img_array)
+        img = Image.fromarray(masked_img.astype(np.uint8))
+    img.save(output_path)
+def remove_polygons_by_type(polygons, skip_types=[]):
+    new_room_polygons = []
+    for polygon, poly_type in polygons:
+        if poly_type in skip_types:
+            continue
+        new_room_polygons.append([polygon, poly_type])
+    return new_room_polygons
+def merge_rooms_and_icons(room_polygons, icon_polygons):
+    new_icon_polygons = []
+    for poly, poly_type in icon_polygons:
+        new_icon_polygons.append([poly, poly_type + 11])
+    return room_polygons + new_icon_polygons
+def create_coco_bounding_box(bb_x, bb_y, image_width, image_height, bound_pad=2):
+    bb_x = np.unique(bb_x)
+    bb_y = np.unique(bb_y)
+    bb_x_min = np.maximum(np.min(bb_x) - bound_pad, 0)
+    bb_y_min = np.maximum(np.min(bb_y) - bound_pad, 0)
+    bb_x_max = np.minimum(np.max(bb_x) + bound_pad, image_width - 1)
+    bb_y_max = np.minimum(np.max(bb_y) + bound_pad, image_height - 1)
+    bb_width = bb_x_max - bb_x_min
+    bb_height = bb_y_max - bb_y_min
+    coco_bb = [bb_x_min, bb_y_min, bb_width, bb_height]
+    return coco_bb
+def process_floorplan(
+    image_set,
+    scene_id,
+    start_scene_id,
+    args,
+    save_dir,
+    annos_folder,
+    use_org_cc5k_classs=False,
+    vis_fp=False,
+    wd2line=False,
+):
+    if use_org_cc5k_classs:
+        class_mapping_dict = CC5K_MAPPING_2  # old: CC5K_MAPPING
+        class_to_index_dict = CC5K_CLASS_MAPPING_2
+        door_window_index = [10, 9]
+    else:
+        class_mapping_dict = CC5K_2_S3D_MAPPING
+        class_to_index_dict = CLASS_MAPPING
+        door_window_index = [16, 17]
+    mask = image_set["label"].numpy()
+    room_polygons = [[poly, poly_type] for poly, poly_type in zip(image_set["room_polygon"], image_set["room_type"])]
+    icon_polygons = [[poly, poly_type] for poly, poly_type in zip(image_set["icon_polygon"], image_set["icon_type"])]
+    image_height, image_width = mask.shape[1:]
+    coco_annotation_dict_list = []
+    # for storing
+    save_dict = prepare_dict(class_to_index_dict)  # old: CC5K_CLASS_MAPPING
+    instance_id = 0
+    img_id = int(scene_id) + start_scene_id
+    img_dict = {}
+    img_dict["file_name"] = str(img_id).zfill(5) + ".png"
+    img_dict["id"] = img_id
+    img_dict["width"] = image_width
+    img_dict["height"] = image_height
+    if vis_fp:
+        os.makedirs(save_dir.rstrip("/") + "_aux", exist_ok=True)
+        visualize_room_polygons(
+            mask[0],
+            room_polygons,
+            list(ROOM_NAMES.values()),
+            save_path=f"{save_dir.rstrip('/') + '_aux'}/{str(img_id).zfill(5)}_room.png",
+        )
+        visualize_room_polygons(
+            mask[1],
+            icon_polygons,
+            list(ICON_NAMES.values()),
+            bg_polygons=room_polygons,
+            save_path=f"{save_dir.rstrip('/') + '_aux'}/{str(img_id).zfill(5)}_icon.png",
+        )
+    #### FILTER NON-USE TYPES
+    # DROP BG
+    room_skip_types = [0]
+    filtered_room_polygons = remove_polygons_by_type(room_polygons, skip_types=room_skip_types)
+    # visualize_room_polygons(mask[0], filtered_room_polygons, list(ROOM_NAMES.values()),
+    #                         save_path=f"{save_dir.rstrip('/') + '_aux'}/{str(img_id).zfill(5)}_room_filtered.png")
+    # Exclude all furnitures, excepts window, door
+    icon_skip_types = [0, *list(range(3, 11))]
+    filtered_icon_polygons = remove_polygons_by_type(icon_polygons, skip_types=icon_skip_types)
+    # visualize_room_polygons(mask[1], filtered_icon_polygons, list(ICON_NAMES.values()),
+    #                         bg_polygons=room_polygons, save_path=f"{save_dir.rstrip('/') + '_aux'}/{str(img_id).zfill(5)}_icon_filtered.png")
+    #### COMBINED
+    combined_polygons = merge_rooms_and_icons(filtered_room_polygons, filtered_icon_polygons)
+    filtered_mask1 = mask[0].copy()
+    filtered_mask1[np.isin(mask[0], room_skip_types)] = 0
+    filtered_mask2 = mask[1].copy()
+    filtered_mask2[np.isin(mask[1], icon_skip_types)] = 0
+    filtered_mask2[filtered_mask2 != 0] += 11
+    filtered_mask = np.where(filtered_mask2 != 0, filtered_mask2, filtered_mask1)
+    new_filtered_mask = filtered_mask.copy()
+    for src_type, dest_type in class_mapping_dict.items():
+        if dest_type is None:
+            continue
+        new_filtered_mask[filtered_mask == src_type] = dest_type + 1
+    # filtered_mask = new_filtered_mask
+    binary_mask = np.zeros_like(filtered_mask)
+    binary_mask = np.where((mask[0] + mask[1]) != 0, 1, 0).astype(np.uint8)
+    filled_mask = fill_holes_in_mask(binary_mask)
+    cv2.imwrite(
+        f"{save_dir.rstrip('/') + '_aux'}/{str(img_id).zfill(5) + '_mask.png'}", filled_mask.astype(np.uint8) * 255
+    )
+    # visualize_room_polygons(combined_mask, combined_polygons, list(ROOM_NAMES.values()) + list(ICON_NAMES.values()), save_path=f"{save_dir}/{str(img_id).zfill(5)}_combined.png")
+    save_image(
+        f"{args.data_root}/{image_set['folder']}/F1_scaled.png",
+        f"{save_dir}/{str(img_id).zfill(5) + '.png'}",
+        mask=filled_mask,
+    )
+    if vis_fp:
+        save_image(
+            f"{args.data_root}/{image_set['folder']}/F1_scaled.png",
+            f"{save_dir.rstrip('/') + '_aux'}/{str(img_id).zfill(5) + '_org.png'}",
+            mask=None,
+        )
+    output_polygon_list = []
+    combined_polygon_list = []
+    for poly_ind, (polygon, poly_type) in enumerate(combined_polygons):
+        poly_shapely = Polygon(polygon)
+        area = poly_shapely.area
+        org_poly_type = poly_type
+        poly_type = class_mapping_dict[poly_type]
+        if poly_type is None:
+            continue
+        if poly_type not in door_window_index and area < 100:
+            continue
+        if poly_type in door_window_index and area < 1:
+            continue
+        rectangle_shapely = poly_shapely.envelope
+        polygon = np.array(polygon)
+        ### here we convert door/window annotation into a single line
+        if poly_type in door_window_index and wd2line:
+            if polygon.shape[0] > 4:
+                if len(polygon) == 5 and (polygon[0] == polygon[-1]).all():
+                    polygon = polygon[:-1]  # drop last point since it is same as first
+                else:
+                    bounding_rect = np.array(poly_shapely.minimum_rotated_rectangle.exterior.coords)
+                    polygon = bounding_rect[:4]
+            assert polygon.shape[0] == 4
+            midp_1 = (polygon[0] + polygon[1]) / 2
+            midp_2 = (polygon[1] + polygon[2]) / 2
+            midp_3 = (polygon[2] + polygon[3]) / 2
+            midp_4 = (polygon[3] + polygon[0]) / 2
+            dist_1_3 = np.square(midp_1 - midp_3).sum()
+            dist_2_4 = np.square(midp_2 - midp_4).sum()
+            if dist_1_3 > dist_2_4:
+                polygon = np.row_stack([midp_1, midp_3])
+            else:
+                polygon = np.row_stack([midp_2, midp_4])
+        coco_seg_poly = []
+        poly_sorted = resort_corners(polygon)
+        for p in poly_sorted:
+            coco_seg_poly += list(p)
+        # Slightly wider bounding box
+        bb_x, bb_y = rectangle_shapely.exterior.xy
+        coco_bb = create_coco_bounding_box(bb_x, bb_y, image_width, image_height, bound_pad=2)
+        coco_annotation_dict = {
+            "segmentation": [coco_seg_poly],
+            "area": area,
+            "iscrowd": 0,
+            "image_id": img_id,
+            "bbox": coco_bb,
+            "category_id": poly_type,
+            "id": instance_id,
+        }
+        coco_annotation_dict_list.append(coco_annotation_dict)
+        instance_id += 1
+        combined_polygon_list.append([np.array(coco_seg_poly).reshape(-1, 2), org_poly_type])
+        output_polygon_list.append([np.array(coco_seg_poly).reshape(-1, 2), poly_type + 1])
+    #### end split_file loop
+    save_dict["images"].append(img_dict)
+    save_dict["annotations"] += coco_annotation_dict_list
+    json_path = f"{annos_folder}/{str(img_id).zfill(5) + '.json'}"
+    with open(json_path, "w") as f:
+        json.dump(save_dict, f)
+    if vis_fp:
+        visualize_room_polygons(
+            filtered_mask,
+            combined_polygon_list,
+            list(ROOM_NAMES.values()) + ["window", "door"],
+            save_path=f"{save_dir.rstrip('/') + '_aux'}/{str(img_id).zfill(5)}_combined.png",
+        )
+        visualize_room_polygons(
+            new_filtered_mask,
+            output_polygon_list,
+            ["null"] + list(class_to_index_dict.keys()),
+            save_path=f"{save_dir.rstrip('/') + '_aux'}/{str(img_id).zfill(5)}_final.png",
+        )
+def prepare_dict(categories_dict):
+    save_dict = {"images": [], "annotations": [], "categories": []}
+    for key, value in categories_dict.items():
+        type_dict = {"supercategory": "room", "id": value, "name": key}
+        save_dict["categories"].append(type_dict)
+    return save_dict
+if __name__ == "__main__":
+    args = config()
+    ### prepare
+    outFolder = args.output
+    if not os.path.exists(outFolder):
+        os.mkdir(outFolder)
+    annotation_outFolder = os.path.join(outFolder, "annotations_json")
+    if not os.path.exists(annotation_outFolder):
+        os.mkdir(annotation_outFolder)
+    annos_train_folder = os.path.join(annotation_outFolder, "train")
+    annos_val_folder = os.path.join(annotation_outFolder, "val")
+    annos_test_folder = os.path.join(annotation_outFolder, "test")
+    os.makedirs(annos_train_folder, exist_ok=True)
+    os.makedirs(annos_val_folder, exist_ok=True)
+    os.makedirs(annos_test_folder, exist_ok=True)
+    train_img_folder = os.path.join(outFolder, "train")
+    val_img_folder = os.path.join(outFolder, "val")
+    test_img_folder = os.path.join(outFolder, "test")
+    for img_folder in [train_img_folder, val_img_folder, test_img_folder]:
+        if not os.path.exists(img_folder):
+            os.mkdir(img_folder)
+    coco_train_json_path = os.path.join(annotation_outFolder, "train.json")
+    coco_val_json_path = os.path.join(annotation_outFolder, "val.json")
+    coco_test_json_path = os.path.join(annotation_outFolder, "test.json")
+    ### begin processing
+    start_scene_id = 3500  # following index of s3d data
+    split_set = ["train.txt", "val.txt", "test.txt"]
+    save_folders = [train_img_folder, val_img_folder, test_img_folder]
+    coco_json_paths = [coco_train_json_path, coco_val_json_path, coco_test_json_path]
+    annos_folders = [annos_train_folder, annos_val_folder, annos_test_folder]
+    def wrapper(scene_id):
+        image_set = dataset[scene_id]
+        process_floorplan(
+            image_set,
+            scene_id,
+            start_scene_id,
+            args,
+            save_dir,
+            annos_folder,
+            use_org_cc5k_classs=True,
+            vis_fp=scene_id < 100,
+            wd2line=not args.disable_wd2line,
+        )
+    def worker_init(dataset_obj):
+        # Store dataset as global to avoid pickling issues
+        global dataset
+        dataset = dataset_obj
+    for split_id, split_file in enumerate(split_set):
+        dataset = FloorplanSVG(args.data_root, split_file, format="txt", original_size=False)
+        save_dir = save_folders[split_id]
+        json_path = coco_json_paths[split_id]
+        print(f"############# {split_file}")
+        annos_folder = annos_folders[split_id]
+        num_processes = 16
+        with Pool(num_processes, initializer=worker_init, initargs=(dataset,)) as p:
+            indices = range(len(dataset))
+            list(tqdm(p.imap(wrapper, indices), total=len(dataset)))
+        start_scene_id += len(dataset)

data_preprocess/cubicasa5k/floorplan_extraction.py ADDED Viewed

	@@ -0,0 +1,403 @@

+import argparse
+import glob
+import json
+import os
+import sys
+from multiprocessing import Pool
+from pathlib import Path
+import cv2
+import numpy as np
+from shapely.geometry import Polygon
+from tqdm import tqdm
+sys.path.append(str(Path(__file__).resolve().parent.parent))
+from common_utils import resort_corners
+from create_coco_cc5k import create_coco_bounding_box
+from util.plot_utils import plot_semantic_rich_floorplan_opencv
+def plot_floor(output_coco_polygons, categories_dict, img_w, img_h, save_path, door_window_index=[10, 9]):
+    gt_sem_rich = []
+    for j, (poly, poly_type) in enumerate(output_coco_polygons):
+        corners = np.array(poly).reshape(-1, 2).astype(np.int32)
+        # corners_flip_y = corners.copy()
+        # corners_flip_y[:,1] = 255 - corners_flip_y[:,1]
+        # corners = corners_flip_y
+        gt_sem_rich.append([corners, poly_type])
+    # plot_semantic_rich_floorplan_nicely(gt_sem_rich, save_path, prec=None, rec=None,
+    #                                     plot_text=True, is_bw=False,
+    #                                     door_window_index=door_window_index,
+    #                                     img_w=img_w,
+    #                                     img_h=img_h,
+    #                                     semantics_label_mapping=get_dataset_class_labels(categories_dict),
+    #                                     )
+    plot_semantic_rich_floorplan_opencv(
+        gt_sem_rich,
+        save_path,
+        img_w=img_w,
+        img_h=img_h,
+        door_window_index=door_window_index,
+        semantics_label_mapping=get_dataset_class_labels(categories_dict),
+        is_bw=False,
+    )
+def prepare_dict(categories_dict):
+    save_dict = {"images": [], "annotations": [], "categories": categories_dict}
+    return save_dict
+def extract_polygons_from_mask(binary_mask, output_mask_path):
+    """
+    Extract polygons from a binary mask where regions with value 1 are polygons
+    and background regions have value 0.
+    Args:
+        binary_mask (numpy.ndarray): Binary mask with shape (H, W), where 1 represents
+                                     the polygon regions and 0 represents the background.
+    Returns:
+        list: A list of polygons, where each polygon is represented as a list of (x, y) coordinates.
+    """
+    # Ensure the mask is binary (0 and 1)
+    binary_mask = (binary_mask > 0).astype(np.uint8)
+    # Find contours in the binary mask
+    contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    # Extract polygons from contours
+    polygons = []
+    for contour in contours:
+        # Approximate the contour to reduce the number of points
+        epsilon = 0.001 * cv2.arcLength(contour, True)  # Adjust epsilon for more/less detail
+        approx_polygon = cv2.approxPolyDP(contour, epsilon, True)
+        polygons.append(approx_polygon.squeeze().tolist())  # Convert to list of (x, y) points
+    # Convert binary_mask to a 3-channel image to draw colored polylines
+    binary_mask_colored = cv2.cvtColor(binary_mask * 255, cv2.COLOR_GRAY2BGR)
+    # Plot polygons on the binary mask with green color
+    for polygon in polygons:
+        points = np.array(polygon, dtype=np.int32)
+        cv2.polylines(binary_mask_colored, [points], isClosed=True, color=(0, 0, 255), thickness=10)
+    cv2.imwrite(output_mask_path, binary_mask_colored)
+    return polygons
+def read_polygons_from_json(json_file):
+    with open(json_file, "r") as f:
+        data = json.load(f)
+        category_dict = data["categories"]
+        polygons = [data["annotations"][i]["segmentation"][0] for i in range(len(data["annotations"]))]
+        poly_types = [data["annotations"][i]["category_id"] for i in range(len(data["annotations"]))]
+        source_misc = [data["annotations"][i] for i in range(len(data["annotations"]))]
+        source_polygons = [(polygons[i], poly_types[i]) for i in range(len(polygons))]
+    return source_polygons, source_misc, category_dict
+def get_dataset_class_labels(category_dict):
+    return {category_dict[i]["id"]: category_dict[i]["name"] for i in range(len(category_dict))}
+def check_all_window_door_inside(polygons, door_window_index):
+    flag = all([poly_type in door_window_index for _, poly_type in polygons])
+    return flag
+def extract_region_and_annotation(
+    source_image,
+    source_annot_path,
+    region_polygons,
+    start_image_id,
+    output_image_dir="output",
+    output_annot_dir="annotations",
+    output_aux_dir="output_aux",
+    vis_aux=True,
+):
+    """
+    Extract regions of the floorplan from the source image based on polygons
+    and generate annotations.
+    Args:
+        source_image (numpy.ndarray): The source image (H, W, 3).
+        polygons (list): List of polygons, where each polygon is a list of (x, y) coordinates.
+        output_dir (str): Directory to save the extracted regions and annotations.
+    Returns:
+        list: A list of annotations for each extracted region.
+    """
+    door_window_index = [10, 9]
+    source_polygons, source_misc, categories_dict = read_polygons_from_json(source_annot_path)
+    source_img_id = os.path.basename(source_annot_path).split(".")[0].zfill(5)
+    if vis_aux:
+        gt_sem_rich_path = os.path.join(output_aux_dir, "{}_org_floor.png".format(source_img_id))
+        plot_floor(
+            source_polygons,
+            categories_dict,
+            source_image.shape[1],
+            source_image.shape[0],
+            gt_sem_rich_path,
+            door_window_index=door_window_index,
+        )
+    margin = 10
+    img_id = start_image_id
+    # each region polygon corresponds to an image
+    for i, polygon in enumerate(region_polygons):
+        instance_id = 0
+        output_coco_polygons = []
+        # Create a mask for the current polygon
+        mask = np.zeros(source_image.shape[:2], dtype=np.uint8)
+        points = np.array(polygon, dtype=np.int32)
+        cv2.fillPoly(mask, [points], 255)
+        # Crop the ROI to the bounding box of the polygon
+        x, y, w, h = cv2.boundingRect(points)
+        # Expand the bounding box by the margin
+        x_expanded = max(x - margin, 0)
+        y_expanded = max(y - margin, 0)
+        w_expanded = min(x + w + margin, source_image.shape[1]) - x_expanded
+        h_expanded = min(y + h + margin, source_image.shape[0]) - y_expanded
+        x, y, w, h = x_expanded, y_expanded, w_expanded, h_expanded
+        cropped_roi = source_image[y : y + h, x : x + w]
+        save_dict = prepare_dict(categories_dict)
+        # Create an annotation for the extracted region
+        img_dict = {}
+        img_dict["file_name"] = f"{str(img_id).zfill(5)}_{source_img_id}.png"
+        img_dict["id"] = img_id
+        img_dict["width"] = w
+        img_dict["height"] = h
+        # Save the cropped ROI
+        roi_filename = f"{output_image_dir}/{str(img_id).zfill(5)}_{source_img_id}.png"
+        cv2.imwrite(roi_filename, cropped_roi)
+        bounding_box = np.array([x, y, x + w, y + h])
+        # Convert source polygons to NumPy arrays for vectorized operations
+        source_polygons_np = [np.array(src_poly[0]).reshape(-1, 2) for src_poly in source_polygons]
+        assert len(source_polygons_np) == len(source_polygons)
+        coco_annotation_dict_list = []
+        # Iterate through the polygons and filter those inside the bounding box
+        for j, tmp in enumerate(source_polygons_np):
+            # Compute the bounding box of the current polygon
+            poly_bbox = np.hstack([np.min(tmp, axis=0), np.max(tmp, axis=0)])
+            # Check if the polygon is outside the bounding box
+            if np.any(poly_bbox[:2] < bounding_box[:2]) or np.any(poly_bbox[2:] > bounding_box[2:]):
+                continue
+            # Scale the polygon coordinates relative to the top-left corner of the bounding box
+            scaled_polygon = tmp - bounding_box[:2]
+            coco_seg_poly = []
+            poly_sorted = resort_corners(scaled_polygon)
+            # image = draw_polygon_on_image(image, poly_shapely, "test_poly.jpg")
+            for p in poly_sorted:
+                coco_seg_poly += list(p)
+            if len(scaled_polygon) == 2:
+                area = source_misc[j]["area"]
+                coco_bb = source_misc[j]["bbox"]
+                # shift the bounding box
+                coco_bb[0] -= bounding_box[0]
+                coco_bb[1] -= bounding_box[1]
+            else:
+                poly_shapely = Polygon(scaled_polygon)
+                area = poly_shapely.area
+                rectangle_shapely = poly_shapely.envelope
+                # Slightly wider bounding box
+                bb_x, bb_y = rectangle_shapely.exterior.xy
+                coco_bb = create_coco_bounding_box(bb_x, bb_y, w, h, bound_pad=2)
+            coco_annotation_dict = {
+                "segmentation": [coco_seg_poly],
+                "area": area,
+                "iscrowd": 0,
+                "image_id": img_id,
+                "bbox": coco_bb,
+                "category_id": source_polygons[j][1],
+                "id": instance_id,
+            }
+            coco_annotation_dict_list.append(coco_annotation_dict)
+            output_coco_polygons.append([coco_seg_poly, source_polygons[j][1]])
+            # Remove after obtaining the polygon
+            # source_polygons.pop(j)
+            # source_misc.pop(j)
+            instance_id += 1
+        # skip if just windows and doors are inside
+        if check_all_window_door_inside(output_coco_polygons, door_window_index):
+            instance_id -= len(coco_annotation_dict_list)
+            continue
+        save_dict["images"].append(img_dict)
+        save_dict["annotations"] += coco_annotation_dict_list
+        if vis_aux:
+            gt_sem_rich_path = os.path.join(
+                output_aux_dir, "{}_{}_floor.png".format(str(img_id).zfill(5), source_img_id)
+            )
+            plot_floor(
+                output_coco_polygons, categories_dict, w, h, gt_sem_rich_path, door_window_index=door_window_index
+            )
+        # Save annotations to a JSON file
+        json_path = f"{output_annot_dir}/{str(img_id).zfill(5)}_{source_img_id}.json"
+        with open(json_path, "w") as f:
+            json.dump(save_dict, f)
+        img_id += 1
+    start_image_id = img_id
+    return start_image_id
+def config():
+    a = argparse.ArgumentParser(description="Generate coco format data for Structured3D")
+    a.add_argument(
+        "--data_root", default="Structured3D_panorama", type=str, help="path to raw Structured3D_panorama folder"
+    )
+    a.add_argument("--output", default="coco_cubicasa5k", type=str, help="path to output folder")
+    args = a.parse_args()
+    return args
+# Example usage
+if __name__ == "__main__":
+    args = config()
+    ### prepare
+    outFolder = args.output
+    if not os.path.exists(outFolder):
+        os.mkdir(outFolder)
+    annotation_outFolder = os.path.join(outFolder, "annotations_json")
+    if not os.path.exists(annotation_outFolder):
+        os.mkdir(annotation_outFolder)
+    annos_train_folder = os.path.join(annotation_outFolder, "train")
+    annos_val_folder = os.path.join(annotation_outFolder, "val")
+    annos_test_folder = os.path.join(annotation_outFolder, "test")
+    os.makedirs(annos_train_folder, exist_ok=True)
+    os.makedirs(annos_val_folder, exist_ok=True)
+    os.makedirs(annos_test_folder, exist_ok=True)
+    train_img_folder = os.path.join(outFolder, "train")
+    val_img_folder = os.path.join(outFolder, "val")
+    test_img_folder = os.path.join(outFolder, "test")
+    for img_folder in [train_img_folder, val_img_folder, test_img_folder]:
+        if not os.path.exists(img_folder):
+            os.mkdir(img_folder)
+    ### begin processing
+    start_image_id = 3500
+    save_folders = [train_img_folder, val_img_folder, test_img_folder]
+    annos_folders = [annos_train_folder, annos_val_folder, annos_test_folder]
+    splits = ["train", "val", "test"]
+    def wrapper(index):
+        image_path, annot_path, mask_path = packed_input_files[index]
+        cur_image_id = int(os.path.basename(image_path).split(".")[0])
+        binary_mask = cv2.imread(mask_path)[:, :, -1]
+        source_image = cv2.imread(image_path, cv2.IMREAD_COLOR)
+        # Extract polygons
+        region_polygons = extract_polygons_from_mask(
+            binary_mask, output_mask_path=f"{save_aux_path}/{str(cur_image_id).zfill(5)}_polylines.png"
+        )
+        return extract_region_and_annotation(
+            source_image,
+            annot_path,
+            region_polygons,
+            start_image_id + index * 10,
+            save_path,
+            save_anno_path,
+            save_aux_path,
+            vis_aux=True,
+        )
+    def worker_init(input_files_object):
+        # Store dataset as global to avoid pickling issues
+        global packed_input_files
+        packed_input_files = input_files_object
+    for i, split in enumerate(splits):
+        image_files = sorted(glob.glob(f"{args.data_root}/{split}/*.png"))
+        image_id_list = [os.path.basename(image_path).split(".")[0] for image_path in image_files]
+        anno_files = [f"{args.data_root}/annotations_json/{split}/{id_}.json" for id_ in image_id_list]
+        mask_files = [f"{args.data_root}/{split}_aux/{id_}_mask.png" for id_ in image_id_list]
+        save_path = save_folders[i]
+        save_anno_path = annos_folders[i]
+        save_aux_path = save_path.rstrip("/") + "_aux"
+        os.makedirs(save_aux_path, exist_ok=True)
+        # for j, (image_path, anno_path, mask_path) in enumerate(zip(image_files, anno_files, mask_files)):
+        #     cur_image_id = int(os.path.basename(image_path).split('.')[0])
+        #     binary_mask = cv2.imread(mask_path)[:,:,-1]
+        #     source_image = cv2.imread(image_path, cv2.IMREAD_COLOR)
+        #     # Extract polygons
+        #     polygons = extract_polygons_from_mask(binary_mask, output_mask_path=f'{save_aux_path}/{str(cur_image_id).zfill(5)}_polylines.png')
+        #     # # skip if only one polygon (floorplan)
+        #     # if len(polygons) == 1:
+        #     #     print(f"Skipping {image_path} with only one polygon")
+        #     #     with open(anno_path, 'r') as f:
+        #     #         data = json.load(f)
+        #     #     # update image id
+        #     #     data['images'][0]['id'] = start_image_id
+        #     #     data['images'][0]["file_name"] = f'{str(start_image_id).zfill(5)}_{str(cur_image_id).zfill(5)}.png'
+        #     #     for anno in data['annotations']:
+        #     #         anno['image_id'] = start_image_id
+        #     #     with open(f"{save_anno_path}/{str(start_image_id).zfill(5)}_{str(cur_image_id).zfill(5)}.json", 'w') as f:
+        #     #         json.dump(data, f, indent=2)
+        #     #     shutil.copy(image_path, f"{save_path}/{str(start_image_id).zfill(5)}_{str(cur_image_id).zfill(5)}.png")
+        #     #     gt_sem_rich_path = os.path.join(save_aux_path, '{}_{}_floor.png'.format(str(start_image_id).zfill(5), str(cur_image_id).zfill(5)))
+        #     #     output_coco_polygons = [(x['segmentation'][0], x['category_id']) for x in data['annotations']]
+        #     #     plot_floor(output_coco_polygons, data['categories'], data['images'][0]['width'], data['images'][0]['height'], gt_sem_rich_path, door_window_index=[10, 9])
+        #     #     start_image_id += 1
+        #     #     continue
+        #     # # Print the extracted polygons
+        #     # print("Extracted polygons:")
+        #     # for i, polygon in enumerate(polygons):
+        #     #     print(f"Polygon {i + 1}: {polygon}")
+        #     start_image_id = extract_region_and_annotation(source_image,
+        #                                 anno_path,
+        #                                 polygons,
+        #                                 start_image_id,
+        #                                 output_image_dir=save_path,
+        #                                 output_annot_dir=save_anno_path,
+        #                                 output_aux_dir=save_aux_path,
+        #                                 vis_aux=True)
+        packed_input_files = list(zip(image_files, anno_files, mask_files))
+        # for j in range(5):
+        #     wrapper(j)
+        # exit(0)
+        num_processes = 16
+        with Pool(num_processes, initializer=worker_init, initargs=(packed_input_files,)) as p:
+            indices = [j for j in range(len(packed_input_files))]
+            list(tqdm(p.imap(wrapper, indices), total=len(indices)))
+        start_image_id += len(packed_input_files) * 10

data_preprocess/cubicasa5k/house.py ADDED Viewed

	@@ -0,0 +1,1131 @@

+import copy
+from xml.dom import minidom
+import cv2
+import numpy as np
+from skimage.draw import polygon
+from svg_utils import (
+    PolygonWall,
+    calc_distance,
+    get_direction,
+    get_gaussian2D,
+    get_icon,
+    get_icon_number,
+    get_points,
+    get_room_number,
+)
+all_rooms = {
+    "Background": 0,  # Not in data. The default outside label
+    "Alcove": 1,
+    "Attic": 2,
+    "Ballroom": 3,
+    "Bar": 4,
+    "Basement": 5,
+    "Bath": 6,
+    "Bedroom": 7,
+    "Below150cm": 8,
+    "CarPort": 9,
+    "Church": 10,
+    "Closet": 11,
+    "ConferenceRoom": 12,
+    "Conservatory": 13,
+    "Counter": 14,
+    "Den": 15,
+    "Dining": 16,
+    "DraughtLobby": 17,
+    "DressingRoom": 18,
+    "EatingArea": 19,
+    "Elevated": 20,
+    "Elevator": 21,
+    "Entry": 22,
+    "ExerciseRoom": 23,
+    "Garage": 24,
+    "Garbage": 25,
+    "Hall": 26,
+    "HallWay": 27,
+    "HotTub": 28,
+    "Kitchen": 29,
+    "Library": 30,
+    "LivingRoom": 31,
+    "Loft": 32,
+    "Lounge": 33,
+    "MediaRoom": 34,
+    "MeetingRoom": 35,
+    "Museum": 36,
+    "Nook": 37,
+    "Office": 38,
+    "OpenToBelow": 39,
+    "Outdoor": 40,
+    "Pantry": 41,
+    "Reception": 42,
+    "RecreationRoom": 43,
+    "RetailSpace": 44,
+    "Room": 45,
+    "Sanctuary": 46,
+    "Sauna": 47,
+    "ServiceRoom": 48,
+    "ServingArea": 49,
+    "Skylights": 50,
+    "Stable": 51,
+    "Stage": 52,
+    "StairWell": 53,
+    "Storage": 54,
+    "SunRoom": 55,
+    "SwimmingPool": 56,
+    "TechnicalRoom": 57,
+    "Theatre": 58,
+    "Undefined": 59,
+    "UserDefined": 60,
+    "Utility": 61,
+    "Wall": 62,
+    "Railing": 63,
+    "Stairs": 64,
+}
+rooms_selected = {
+    "Alcove": 11,
+    "Attic": 11,
+    "Ballroom": 11,
+    "Bar": 11,
+    "Basement": 11,
+    "Bath": 6,
+    "Bedroom": 5,
+    "CarPort": 10,
+    "Church": 11,
+    "Closet": 9,
+    "ConferenceRoom": 11,
+    "Conservatory": 11,
+    "Counter": 11,
+    "Den": 11,
+    "Dining": 4,
+    "DraughtLobby": 7,
+    "DressingRoom": 9,
+    "EatingArea": 4,
+    "Elevated": 11,
+    "Elevator": 11,
+    "Entry": 7,
+    "ExerciseRoom": 11,
+    "Garage": 10,
+    "Garbage": 11,
+    "Hall": 11,
+    "HallWay": 7,
+    "HotTub": 11,
+    "Kitchen": 3,
+    "Library": 11,
+    "LivingRoom": 4,
+    "Loft": 11,
+    "Lounge": 4,
+    "MediaRoom": 11,
+    "MeetingRoom": 11,
+    "Museum": 11,
+    "Nook": 11,
+    "Office": 11,
+    "OpenToBelow": 11,
+    "Outdoor": 1,
+    "Pantry": 11,
+    "Reception": 11,
+    "RecreationRoom": 11,
+    "RetailSpace": 11,
+    "Room": 11,
+    "Sanctuary": 11,
+    "Sauna": 6,
+    "ServiceRoom": 11,
+    "ServingArea": 11,
+    "Skylights": 11,
+    "Stable": 11,
+    "Stage": 11,
+    "StairWell": 11,
+    "Storage": 9,
+    "SunRoom": 11,
+    "SwimmingPool": 11,
+    "TechnicalRoom": 11,
+    "Theatre": 11,
+    "Undefined": 11,
+    "UserDefined": 11,
+    "Utility": 11,
+    "Background": 0,  # Not in data. The default outside label
+    "Wall": 2,
+    "Railing": 8,
+}
+room_name_map = {
+    "Alcove": "Room",
+    "Attic": "Room",
+    "Ballroom": "Room",
+    "Bar": "Room",
+    "Basement": "Room",
+    "Bath": "Bath",
+    "Bedroom": "Bedroom",
+    "Below150cm": "Room",
+    "CarPort": "Garage",
+    "Church": "Room",
+    "Closet": "Storage",
+    "ConferenceRoom": "Room",
+    "Conservatory": "Room",
+    "Counter": "Room",
+    "Den": "Room",
+    "Dining": "Dining",
+    "DraughtLobby": "Entry",
+    "DressingRoom": "Storage",
+    "EatingArea": "Dining",
+    "Elevated": "Room",
+    "Elevator": "Room",
+    "Entry": "Entry",
+    "ExerciseRoom": "Room",
+    "Garage": "Garage",
+    "Garbage": "Room",
+    "Hall": "Room",
+    "HallWay": "Entry",
+    "HotTub": "Room",
+    "Kitchen": "Kitchen",
+    "Library": "Room",
+    "LivingRoom": "LivingRoom",
+    "Loft": "Room",
+    "Lounge": "LivingRoom",
+    "MediaRoom": "Room",
+    "MeetingRoom": "Room",
+    "Museum": "Room",
+    "Nook": "Room",
+    "Office": "Room",
+    "OpenToBelow": "Room",
+    "Outdoor": "Outdoor",
+    "Pantry": "Room",
+    "Reception": "Room",
+    "RecreationRoom": "Room",
+    "RetailSpace": "Room",
+    "Room": "Room",
+    "Sanctuary": "Room",
+    "Sauna": "Bath",
+    "ServiceRoom": "Room",
+    "ServingArea": "Room",
+    "Skylights": "Room",
+    "Stable": "Room",
+    "Stage": "Room",
+    "StairWell": "Room",
+    "Storage": "Storage",
+    "SunRoom": "Room",
+    "SwimmingPool": "Room",
+    "TechnicalRoom": "Room",
+    "Theatre": "Room",
+    "Undefined": "Room",
+    "UserDefined": "Room",
+    "Utility": "Room",
+    "Wall": "Wall",
+    "Railing": "Railing",
+    "Background": "Background",
+}  # Not in data. The default outside label
+all_icons = {
+    "Empty": 0,
+    "Window": 1,
+    "Door": 2,
+    "BaseCabinet": 3,
+    "BaseCabinetRound": 4,
+    "BaseCabinetTriangle": 5,
+    "Bathtub": 6,
+    "BathtubRound": 7,
+    "Chimney": 8,
+    "Closet": 9,
+    "ClosetRound": 10,
+    "ClosetTriangle": 11,
+    "CoatCloset": 12,
+    "CoatRack": 13,
+    "CornerSink": 14,
+    "CounterTop": 15,
+    "DoubleSink": 16,
+    "DoubleSinkRight": 17,
+    "ElectricalAppliance": 18,
+    "Fireplace": 19,
+    "FireplaceCorner": 20,
+    "FireplaceRound": 21,
+    "GasStove": 22,
+    "Housing": 23,
+    "Jacuzzi": 24,
+    "PlaceForFireplace": 25,
+    "PlaceForFireplaceCorner": 26,
+    "PlaceForFireplaceRound": 27,
+    "RoundSink": 28,
+    "SaunaBenchHigh": 29,
+    "SaunaBenchLow": 30,
+    "SaunaBenchMid": 31,
+    "Shower": 32,
+    "ShowerCab": 33,
+    "ShowerScreen": 34,
+    "ShowerScreenRoundLeft": 35,
+    "ShowerScreenRoundRight": 36,
+    "SideSink": 37,
+    "Sink": 38,
+    "Toilet": 39,
+    "Urinal": 40,
+    "WallCabinet": 41,
+    "WaterTap": 42,
+    "WoodStove": 43,
+    "Misc": 44,
+    "SaunaBench": 45,
+    "SaunaStove": 46,
+    "WashingMachine": 47,
+    "IntegratedStove": 48,
+    "Dishwasher": 49,
+    "GeneralAppliance": 50,
+    "ShowerPlatform": 51,
+}
+icons_selected = {
+    "Window": 1,
+    "Door": 2,
+    "Closet": 3,
+    "ClosetRound": 3,
+    "ClosetTriangle": 3,
+    "CoatCloset": 3,
+    "CoatRack": 3,
+    "CounterTop": 3,
+    "Housing": 3,
+    "ElectricalAppliance": 4,
+    "WoodStove": 4,
+    "GasStove": 4,
+    "Toilet": 5,
+    "Urinal": 5,
+    "SideSink": 6,
+    "Sink": 6,
+    "RoundSink": 6,
+    "CornerSink": 6,
+    "DoubleSink": 6,
+    "DoubleSinkRight": 6,
+    "WaterTap": 6,
+    "SaunaBenchHigh": 7,
+    "SaunaBenchLow": 7,
+    "SaunaBenchMid": 7,
+    "SaunaBench": 7,
+    "Fireplace": 8,
+    "FireplaceCorner": 8,
+    "FireplaceRound": 8,
+    "PlaceForFireplace": 8,
+    "PlaceForFireplaceCorner": 8,
+    "PlaceForFireplaceRound": 8,
+    "Bathtub": 9,
+    "BathtubRound": 9,
+    "Chimney": 10,
+    "Misc": None,
+    "BaseCabinetRound": None,
+    "BaseCabinetTriangle": None,
+    "BaseCabinet": None,
+    "WallCabinet": None,
+    "Shower": None,
+    "ShowerCab": None,
+    "ShowerPlatform": None,
+    "ShowerScreen": None,
+    "ShowerScreenRoundRight": None,
+    "ShowerScreenRoundLeft": None,
+    "Jacuzzi": None,
+}
+icon_name_map = {
+    "Window": "Window",
+    "Door": "Door",
+    "Closet": "Closet",
+    "ClosetRound": "Closet",
+    "ClosetTriangle": "Closet",
+    "CoatCloset": "Closet",
+    "CoatRack": "Closet",
+    "CounterTop": "Closet",
+    "Housing": "Closet",
+    "ElectricalAppliance": "ElectricalAppliance",
+    "WoodStove": "ElectricalAppliance",
+    "GasStove": "ElectricalAppliance",
+    "SaunaStove": "ElectricalAppliance",
+    "Toilet": "Toilet",
+    "Urinal": "Toilet",
+    "SideSink": "Sink",
+    "Sink": "Sink",
+    "RoundSink": "Sink",
+    "CornerSink": "Sink",
+    "DoubleSink": "Sink",
+    "DoubleSinkRight": "Sink",
+    "WaterTap": "Sink",
+    "SaunaBenchHigh": "SaunaBench",
+    "SaunaBenchLow": "SaunaBench",
+    "SaunaBenchMid": "SaunaBench",
+    "SaunaBench": "SaunaBench",
+    "Fireplace": "Fireplace",
+    "FireplaceCorner": "Fireplace",
+    "FireplaceRound": "Fireplace",
+    "PlaceForFireplace": "Fireplace",
+    "PlaceForFireplaceCorner": "Fireplace",
+    "PlaceForFireplaceRound": "Fireplace",
+    "Bathtub": "Bathtub",
+    "BathtubRound": "Bathtub",
+    "Chimney": "Chimney",
+    "Misc": None,
+    "BaseCabinetRound": None,
+    "BaseCabinetTriangle": None,
+    "BaseCabinet": None,
+    "WallCabinet": None,
+    "Shower": "None",
+    "ShowerCab": "None",
+    "ShowerPlatform": "None",
+    "ShowerScreen": None,
+    "ShowerScreenRoundRight": None,
+    "ShowerScreenRoundLeft": None,
+    "Jacuzzi": None,
+    "WashingMachine": None,
+    "IntegratedStove": "ElectricalAppliance",
+    "Dishwasher": "ElectricalAppliance",
+    "GeneralAppliance": "ElectricalAppliance",
+}
+def complete_polygons(polygons, polygon_types):
+    new_polygons = []
+    new_types = []
+    for poly, poly_type in zip(polygons, polygon_types):
+        if len(poly) < 3:
+            print(f"Class {poly_type} has less than 3 points. Skipped!")
+            continue
+        poly_array = np.array(poly)
+        t = copy.copy(poly)
+        # append the beginning point
+        if len(poly_array) > 2 and (poly_array[0] != poly_array[-1]).any():
+            t.append(poly[0])
+        new_polygons.append(t)
+        new_types.append(poly_type)
+    return new_polygons, new_types
+class House:
+    def __init__(self, path, height, width, icon_list=icons_selected, room_list=rooms_selected):
+        self.height = height
+        self.width = width
+        shape = height, width
+        svg = minidom.parse(path)
+        self.walls = np.empty((height, width), dtype=np.uint8)
+        self.walls.fill(0)
+        self.wall_ids = np.empty((height, width), dtype=np.uint8)
+        self.wall_ids.fill(0)
+        self.icons = np.zeros((height, width), dtype=np.uint8)
+        # junction_id = 0
+        wall_id = 1
+        self.wall_ends = []
+        self.wall_objs = []
+        self.icon_types = []
+        self.room_types = []
+        self.icon_corners = {"upper_left": [], "upper_right": [], "lower_left": [], "lower_right": []}
+        self.opening_corners = {"left": [], "right": [], "up": [], "down": []}
+        self.representation = {"doors": [], "icons": [], "labels": [], "walls": []}
+        self.icon_areas = []
+        self.wall_coords = []
+        self.icon_coords = []
+        for e in svg.getElementsByTagName("g"):
+            try:
+                if e.getAttribute("id") == "Wall":
+                    wall = PolygonWall(e, wall_id, shape)
+                    wall.rr, wall.cc = self._clip_outside(wall.rr, wall.cc)
+                    self.wall_objs.append(wall)
+                    self.walls[wall.rr, wall.cc] = room_list["Wall"]
+                    self.wall_ids[wall.rr, wall.cc] = wall_id
+                    self.wall_ends.append(wall.end_points)
+                    Y, X = self._clip_outside(wall.Y, wall.X)
+                    self.wall_coords.append([(x, y) for x, y in zip(X, Y)])
+                    self.room_types.append(room_list["Wall"])
+                    wall_id += 1
+                if e.getAttribute("id") == "Railing":
+                    wall = PolygonWall(e, wall_id, shape)
+                    wall.rr, wall.cc = self._clip_outside(wall.rr, wall.cc)
+                    self.wall_objs.append(wall)
+                    self.walls[wall.rr, wall.cc] = room_list["Railing"]
+                    self.wall_ids[wall.rr, wall.cc] = wall_id
+                    self.wall_ends.append(wall.end_points)
+                    Y, X = self._clip_outside(wall.Y, wall.X)
+                    self.wall_coords.append([(x, y) for x, y in zip(X, Y)])
+                    self.room_types.append(room_list["Railing"])
+                    wall_id += 1
+            except ValueError as k:
+                if str(k) != "small wall":
+                    raise k
+                continue
+            if e.getAttribute("id") == "Window":
+                X, Y = get_points(e)
+                rr, cc = polygon(X, Y)
+                cc, rr = self._clip_outside(cc, rr)
+                direction = get_direction(X, Y)
+                locs = np.column_stack((X, Y))
+                if direction == "H":
+                    left_index = np.argmin(locs[:, 0])
+                    left1 = locs[left_index]
+                    locs = np.delete(locs, left_index, axis=0)
+                    left_index = np.argmin(locs[:, 0])
+                    left2 = locs[left_index]
+                    right = np.delete(locs, left_index, axis=0)
+                    left = np.array([left1, left2])
+                    point_left = left.mean(axis=0)
+                    point_right = right.mean(axis=0)
+                    self.opening_corners["left"].append(point_left)
+                    self.opening_corners["right"].append(point_right)
+                    door_rep = [[list(point_left), list(point_right)], ["door", 1, 1]]
+                    self.representation["doors"].append(door_rep)
+                else:
+                    up_index = np.argmin(locs[:, 1])
+                    up1 = locs[up_index]
+                    locs = np.delete(locs, up_index, axis=0)
+                    up_index = np.argmin(locs[:, 1])
+                    up2 = locs[up_index]
+                    down = np.delete(locs, up_index, axis=0)
+                    up = np.array([up1, up2])
+                    point_up = up.mean(axis=0)
+                    point_down = down.mean(axis=0)
+                    self.opening_corners["up"].append(point_up)
+                    self.opening_corners["down"].append(point_down)
+                    door_rep = [[list(point_up), list(point_down)], ["door", 1, 1]]
+                    self.representation["doors"].append(door_rep)
+                self.icons[cc, rr] = 1
+                self.icon_types.append(1)
+                Y, X = self._clip_outside(Y, X)
+                self.icon_coords.append([(x, y) for x, y in zip(X, Y)])
+            if e.getAttribute("id") == "Door":
+                # How to reperesent empty door space
+                X, Y = get_points(e)
+                rr, cc = polygon(X, Y)
+                cc, rr = self._clip_outside(cc, rr)
+                direction = get_direction(X, Y)
+                locs = np.column_stack((X, Y))
+                if direction == "H":
+                    left_index = np.argmin(locs[:, 0])
+                    left1 = locs[left_index]
+                    locs = np.delete(locs, left_index, axis=0)
+                    left_index = np.argmin(locs[:, 0])
+                    left2 = locs[left_index]
+                    right = np.delete(locs, left_index, axis=0)
+                    left = np.array([left1, left2])
+                    point_left = left.mean(axis=0)
+                    point_right = right.mean(axis=0)
+                    self.opening_corners["left"].append(left.mean(axis=0))
+                    self.opening_corners["right"].append(right.mean(axis=0))
+                    door_rep = [[list(point_left), list(point_right)], ["door", 1, 1]]
+                    self.representation["doors"].append(door_rep)
+                else:
+                    up_index = np.argmin(locs[:, 1])
+                    up1 = locs[up_index]
+                    locs = np.delete(locs, up_index, axis=0)
+                    up_index = np.argmin(locs[:, 1])
+                    up2 = locs[up_index]
+                    down = np.delete(locs, up_index, axis=0)
+                    up = np.array([up1, up2])
+                    point_up = up.mean(axis=0)
+                    point_down = down.mean(axis=0)
+                    self.opening_corners["up"].append(up.mean(axis=0))
+                    self.opening_corners["down"].append(down.mean(axis=0))
+                    door_rep = [[list(point_up), list(point_down)], ["door", 1, 1]]
+                    self.representation["doors"].append(door_rep)
+                self.icons[cc, rr] = 2
+                self.icon_types.append(2)
+                Y, X = self._clip_outside(Y, X)
+                self.icon_coords.append([(x, y) for x, y in zip(X, Y)])
+            if "FixedFurniture " in e.getAttribute("class"):
+                num = get_icon_number(e, icon_list)
+                if num is not None:
+                    rr, cc, X, Y = get_icon(e)
+                    # only four corner icons
+                    if len(X) == 4:
+                        locs = np.column_stack((X, Y))
+                        up_left_index = locs.sum(axis=1).argmin()
+                        self.icon_corners["upper_left"].append(locs[up_left_index])
+                        up_left = list(locs[up_left_index])
+                        locs = np.delete(locs, up_left_index, axis=0)
+                        down_right_index = locs.sum(axis=1).argmax()
+                        self.icon_corners["lower_right"].append(locs[down_right_index])
+                        down_right = list(locs[down_right_index])
+                        locs = np.delete(locs, down_right_index, axis=0)
+                        up_right_index = locs[:, 1].argmin()
+                        self.icon_corners["upper_right"].append(locs[up_right_index])
+                        locs = np.delete(locs, up_right_index, axis=0)
+                        self.icon_corners["lower_left"].append(locs[0])
+                        icon_name = e.getAttribute("class").replace("FixedFurniture ", "").split(" ")[0]
+                        icon_name = icon_name_map[icon_name]
+                        icon_rep = [[up_left, down_right], [icon_name, 1, 1]]
+                        self.representation["icons"].append(icon_rep)
+                        rr, cc = self._clip_outside(rr, cc)
+                        self.icon_areas.append(len(rr))
+                        self.icons[rr, cc] = num
+                        self.icon_types.append(num)
+                        Y, X = self._clip_outside(Y, X)
+                        self.icon_coords.append([(x, y) for x, y in zip(X, Y)])
+            if "Space " in e.getAttribute("class"):
+                num = get_room_number(e, room_list)
+                # rr, cc = get_polygon(e)
+                X, Y = get_points(e)
+                rr, cc = polygon(Y, X)
+                if len(rr) != 0:
+                    rr, cc = self._clip_outside(rr, cc)
+                    if len(rr) != 0 and len(cc) != 0:
+                        self.walls[rr, cc] = num
+                        self.room_types.append(num)
+                        Y, X = self._clip_outside(Y, X)
+                        self.wall_coords.append([(x, y) for x, y in zip(X, Y)])
+                        rr_mean = int(round(np.mean(rr)))
+                        cc_mean = int(round(np.mean(cc)))
+                        center_box = [[rr_mean - 10, cc_mean - 10], [rr_mean + 10, cc_mean + 10]]
+                        room_name = e.getAttribute("class").replace("Space ", "").split(" ")[0]
+                        room_name = room_name_map[room_name]
+                        self.representation["labels"].append([center_box, [room_name, 1, 1]])
+            # if "Stairs" in e.getAttribute("class"):
+            # for c in e.childNodes:
+            # if c.getAttribute("class") in ["Flight", "Winding"]:
+            # num = room_list["Stairs"]
+            # rr, cc = get_polygon(c)
+            # if len(rr) != 0:
+            # rr, cc = self._clip_outside(rr, cc)
+            # if len(rr) != 0 and len(cc) != 0:
+            # self.walls[rr, cc] = num
+            # self.room_types.append(num)
+            # rr_mean = int(round(np.mean(rr)))
+            # cc_mean = int(round(np.mean(cc)))
+            # center_box = [[rr_mean-10, cc_mean-10], [rr_mean+10, cc_mean+10]]
+            # room_name = "Stairs"
+            # # room_name = room_name_map[room_name]
+            # self.representation['labels'].append([center_box, [room_name, 1, 1]])
+        self.avg_wall_width = self.get_avg_wall_width()
+        self.new_walls = self.connect_walls(self.wall_objs)
+        for w in self.new_walls:
+            w.change_end_points()
+        for w in self.pillar_walls:
+            self.new_walls.append(w)
+        self.points = self.lines_to_points(self.width, self.height, self.new_walls, self.avg_wall_width)
+        self.points = self.merge_joints(self.points, self.avg_wall_width)
+        # walls to representation
+        for w in self.new_walls:
+            end_points = w.end_points.round().astype("int").tolist()
+            if w.name == "Wall":
+                self.representation["walls"].append([end_points, ["wall", 1, 1]])
+            else:
+                self.representation["walls"].append([end_points, ["wall", 2, 1]])
+        # append begining point at last pos
+        print("Complete room coords")
+        self.wall_coords, self.room_types = complete_polygons(self.wall_coords, self.room_types)
+        print("Complete icon coords")
+        self.icon_coords, self.icon_types = complete_polygons(self.icon_coords, self.icon_types)
+    def get_coords_and_labels(self):
+        assert len(self.wall_coords) == len(self.room_types)
+        assert len(self.icon_coords) == len(self.icon_types)
+        return self.wall_coords, self.room_types, self.icon_coords, self.icon_types
+    def get_tensor(self):
+        heatmaps = self.get_heatmaps()
+        wall_t = np.expand_dims(self.walls, axis=0)
+        icon_t = np.expand_dims(self.icons, axis=0)
+        tensor = np.concatenate((heatmaps, wall_t, icon_t), axis=0)
+        return tensor
+    def get_segmentation_tensor(self):
+        wall_t = np.expand_dims(self.walls, axis=0)
+        icon_t = np.expand_dims(self.icons, axis=0)
+        tensor = np.concatenate((wall_t, icon_t), axis=0)
+        return tensor
+    def get_heatmap_dict(self):
+        # init dict
+        heatmaps = {}
+        for i in range(21):
+            heatmaps[i] = []
+        for p in self.points:
+            cord, _, p_type = p
+            x = int(np.round(cord[0]))
+            y = int(np.round(cord[1]))
+            channel = self.get_number(p_type)
+            if y < self.height and x < self.width:
+                heatmaps[channel - 1] = heatmaps[channel - 1] + [(x, y)]
+        channel = 13
+        for i in self.opening_corners["left"]:
+            y = int(i[1])
+            x = int(i[0])
+            if y < self.height and x < self.width:
+                heatmaps[channel] = heatmaps[channel] + [(x, y)]
+        channel += 1
+        for i in self.opening_corners["right"]:
+            y = int(i[1])
+            x = int(i[0])
+            if y < self.height and x < self.width:
+                heatmaps[channel] = heatmaps[channel] + [(x, y)]
+        channel += 1
+        for i in self.opening_corners["up"]:
+            y = int(i[1])
+            x = int(i[0])
+            if y < self.height and x < self.width:
+                heatmaps[channel] = heatmaps[channel] + [(x, y)]
+        channel += 1
+        for i in self.opening_corners["down"]:
+            y = int(i[1])
+            x = int(i[0])
+            if y < self.height and x < self.width:
+                heatmaps[channel] = heatmaps[channel] + [(x, y)]
+        channel += 1
+        for i in self.icon_corners["upper_left"]:
+            y = int(i[1])
+            x = int(i[0])
+            if y < self.height and x < self.width:
+                heatmaps[channel] = heatmaps[channel] + [(x, y)]
+        channel += 1
+        for i in self.icon_corners["upper_right"]:
+            y = int(i[1])
+            x = int(i[0])
+            if y < self.height and x < self.width:
+                heatmaps[channel] = heatmaps[channel] + [(x, y)]
+        channel += 1
+        for i in self.icon_corners["lower_left"]:
+            y = int(i[1])
+            x = int(i[0])
+            if y < self.height and x < self.width:
+                heatmaps[channel] = heatmaps[channel] + [(x, y)]
+        channel += 1
+        for i in self.icon_corners["lower_right"]:
+            y = int(i[1])
+            x = int(i[0])
+            if y < self.height and x < self.width:
+                heatmaps[channel] = heatmaps[channel] + [(x, y)]
+        return heatmaps
+    def get_heatmaps(self):
+        heatmaps = np.zeros((21, self.height, self.width))
+        for p in self.points:
+            cord, _, p_type = p
+            x = int(np.round(cord[0]))
+            y = int(np.round(cord[1]))
+            channel = self.get_number(p_type)
+            if y < self.height and x < self.width:
+                heatmaps[channel - 1, y, x] = 1
+        channel = 13
+        for i in self.opening_corners["left"]:
+            y = int(i[1])
+            x = int(i[0])
+            if y < self.height and x < self.width:
+                heatmaps[channel, y, x] = 1
+        channel += 1
+        for i in self.opening_corners["right"]:
+            y = int(i[1])
+            x = int(i[0])
+            if y < self.height and x < self.width:
+                heatmaps[channel, y, x] = 1
+        channel += 1
+        for i in self.opening_corners["up"]:
+            y = int(i[1])
+            x = int(i[0])
+            if y < self.height and x < self.width:
+                heatmaps[channel, y, x] = 1
+        channel += 1
+        for i in self.opening_corners["down"]:
+            y = int(i[1])
+            x = int(i[0])
+            if y < self.height and x < self.width:
+                heatmaps[channel, y, x] = 1
+        channel += 1
+        for i in self.icon_corners["upper_left"]:
+            y = int(i[1])
+            x = int(i[0])
+            if y < self.height and x < self.width:
+                heatmaps[channel, y, x] = 1
+        channel += 1
+        for i in self.icon_corners["upper_right"]:
+            y = int(i[1])
+            x = int(i[0])
+            if y < self.height and x < self.width:
+                heatmaps[channel, y, x] = 1
+        channel += 1
+        for i in self.icon_corners["lower_left"]:
+            y = int(i[1])
+            x = int(i[0])
+            if y < self.height and x < self.width:
+                heatmaps[channel, y, x] = 1
+        channel += 1
+        for i in self.icon_corners["lower_right"]:
+            y = int(i[1])
+            x = int(i[0])
+            if y < self.height and x < self.width:
+                heatmaps[channel, y, x] = 1
+        kernel = get_gaussian2D(13)
+        for i, h in enumerate(heatmaps):
+            heatmaps[i] = cv2.filter2D(h, -1, kernel)
+        return heatmaps
+    def _clip_outside(self, rr, cc):
+        s = np.column_stack((rr, cc))
+        s = s[s[:, 0] < self.height]
+        s = s[s[:, 1] < self.width]
+        return s[:, 0], s[:, 1]
+    def lines_to_points(self, width, height, walls, lineWidth):
+        lines = [h.end_points for h in walls]
+        points = []
+        usedLinePointMask = []
+        for lineIndex, line in enumerate(lines):
+            usedLinePointMask.append([False, False])
+        for lineIndex_1, wall_1 in enumerate(walls):
+            line_1 = wall_1.end_points
+            lineDim_1 = self.get_lineDim(line_1, 1)
+            if lineDim_1 <= -1:
+                # If wall is diagonal we skip
+                continue
+            fixedValue_1 = (line_1[0][1 - lineDim_1] + line_1[1][1 - lineDim_1]) / 2
+            for lineIndex_2, wall_2 in enumerate(walls):
+                line_2 = wall_2.end_points
+                if lineIndex_2 <= lineIndex_1:
+                    continue
+                lineDim_2 = self.get_lineDim(line_2, 1)
+                if lineDim_2 + lineDim_1 != 1:
+                    # if walls have the same direction we skip
+                    continue
+                fixedValue_2 = (line_2[0][1 - lineDim_2] + line_2[1][1 - lineDim_2]) / 2
+                lineWidth = max(wall_1.max_width, wall_2.max_width)
+                nearestPair, minDistance = self.findNearestJunctionPair(line_1, line_2, lineWidth)
+                if minDistance <= lineWidth:
+                    pointIndex_1 = nearestPair[0]
+                    pointIndex_2 = nearestPair[1]
+                    if pointIndex_1 > -1 and pointIndex_2 > -1:
+                        point = [None, None]
+                        point[lineDim_1] = fixedValue_2
+                        point[lineDim_2] = fixedValue_1
+                        side = [None, None]
+                        side[lineDim_1] = line_1[1 - pointIndex_1][lineDim_1] - fixedValue_2
+                        side[lineDim_2] = line_2[1 - pointIndex_2][lineDim_2] - fixedValue_1
+                        if side[0] < 0 and side[1] < 0:
+                            points.append([point, point, ["point", 2, 1]])
+                        elif side[0] > 0 and side[1] < 0:
+                            points.append([point, point, ["point", 2, 2]])
+                        elif side[0] > 0 and side[1] > 0:
+                            points.append([point, point, ["point", 2, 3]])
+                        elif side[0] < 0 and side[1] > 0:
+                            points.append([point, point, ["point", 2, 4]])
+                        usedLinePointMask[lineIndex_1][pointIndex_1] = True
+                        usedLinePointMask[lineIndex_2][pointIndex_2] = True
+                    elif (pointIndex_1 > -1 and pointIndex_2 == -1) or (pointIndex_1 == -1 and pointIndex_2 > -1):
+                        if pointIndex_1 > -1:
+                            lineDim = lineDim_1
+                            pointIndex = pointIndex_1
+                            fixedValue = fixedValue_2
+                            pointValue = line_1[pointIndex_1][1 - lineDim_1]
+                            usedLinePointMask[lineIndex_1][pointIndex_1] = True
+                        else:
+                            lineDim = lineDim_2
+                            pointIndex = pointIndex_2
+                            fixedValue = fixedValue_1
+                            pointValue = line_2[pointIndex_2][1 - lineDim_2]
+                            usedLinePointMask[lineIndex_2][pointIndex_2] = True
+                        point = [None, None]
+                        point[lineDim] = fixedValue
+                        point[1 - lineDim] = pointValue
+                        if pointIndex == 0:
+                            if lineDim == 0:
+                                points.append([point, point, ["point", 3, 4]])
+                            else:
+                                points.append([point, point, ["point", 3, 1]])
+                        else:
+                            if lineDim == 0:
+                                points.append([point, point, ["point", 3, 2]])
+                            else:
+                                points.append([point, point, ["point", 3, 3]])
+                elif (
+                    line_1[0][lineDim_1] < fixedValue_2
+                    and line_1[1][lineDim_1] > fixedValue_2
+                    and line_2[0][lineDim_2] < fixedValue_1
+                    and line_2[1][lineDim_2] > fixedValue_1
+                ):
+                    point = [None, None]
+                    point[lineDim_1] = fixedValue_2
+                    point[lineDim_2] = fixedValue_1
+                    points.append([point, point, ["point", 4, 1]])
+        for lineIndex, pointMask in enumerate(usedLinePointMask):
+            lineDim = self.get_lineDim(lines[lineIndex], 1)
+            for pointIndex in range(2):
+                if pointMask[pointIndex] is True:
+                    continue
+                point = [lines[lineIndex][pointIndex][0], lines[lineIndex][pointIndex][1]]
+                if pointIndex == 0:
+                    if lineDim == 0:
+                        points.append([point, point, ["point", 1, 4]])
+                    elif lineDim == 1:
+                        points.append([point, point, ["point", 1, 1]])
+                else:
+                    if lineDim == 0:
+                        points.append([point, point, ["point", 1, 2]])
+                    elif lineDim == 1:
+                        points.append([point, point, ["point", 1, 3]])
+        return points
+    def _pointId2index(self, g, t):
+        g_ = g - 1
+        t_ = t - 1
+        k = g_ * 4 + t_
+        return k
+    def _index2pointId(self, k):
+        g = k // 4 + 1
+        t = k % 4 + 1
+        return [g, t]
+    def _are_close(self, p1, p2, width):
+        return calc_distance(p1, p2) < width
+    def merge_joints(self, points, wall_width):
+        lookuptable = {}
+        lookuptable[0] = {0: 0, 1: 7, 2: None, 3: 6, 4: 9, 5: 11, 6: 6, 7: 7, 8: 8, 9: 9, 10: 12, 11: 11, 12: 12}
+        lookuptable[1] = {0: 7, 1: 1, 2: 4, 3: None, 4: 4, 5: 10, 6: 8, 7: 7, 8: 8, 9: 9, 10: 10, 11: 12, 12: 12}
+        lookuptable[2] = {0: None, 1: 4, 2: 2, 3: 5, 4: 4, 5: 5, 6: 11, 7: 9, 8: 12, 9: 9, 10: 10, 11: 11, 12: 12}
+        lookuptable[3] = {0: 6, 1: None, 2: 5, 3: 3, 4: 10, 5: 5, 6: 6, 7: 8, 8: 8, 9: 12, 10: 10, 11: 11, 12: 12}
+        lookuptable[4] = {0: 9, 1: 4, 2: 4, 3: 10, 4: 4, 5: 10, 6: 12, 7: 9, 8: 12, 9: 9, 10: 10, 11: 12, 12: 12}
+        lookuptable[5] = {0: 11, 1: 10, 2: 5, 3: 5, 4: 10, 5: 5, 6: 11, 7: 12, 8: 12, 9: 12, 10: 10, 11: 11, 12: 12}
+        lookuptable[6] = {0: 6, 1: 8, 2: 11, 3: 6, 4: 12, 5: 11, 6: 6, 7: 8, 8: 8, 9: 12, 10: 12, 11: 11, 12: 12}
+        lookuptable[7] = {0: 7, 1: 7, 2: 9, 3: 8, 4: 9, 5: 12, 6: 8, 7: 7, 8: 8, 9: 9, 10: 12, 11: 12, 12: 12}
+        lookuptable[8] = {0: 8, 1: 8, 2: 12, 3: 8, 4: 12, 5: 12, 6: 8, 7: 8, 8: 8, 9: 12, 10: 12, 11: 12, 12: 12}
+        lookuptable[9] = {0: 9, 1: 9, 2: 9, 3: 12, 4: 9, 5: 12, 6: 12, 7: 9, 8: 12, 9: 9, 10: 12, 11: 12, 12: 12}
+        lookuptable[10] = {
+            0: 12,
+            1: 10,
+            2: 10,
+            3: 10,
+            4: 10,
+            5: 10,
+            6: 12,
+            7: 12,
+            8: 12,
+            9: 12,
+            10: 10,
+            11: 12,
+            12: 12,
+        }
+        lookuptable[11] = {
+            0: 11,
+            1: 12,
+            2: 11,
+            3: 11,
+            4: 12,
+            5: 11,
+            6: 11,
+            7: 12,
+            8: 12,
+            9: 12,
+            10: 12,
+            11: 11,
+            12: 12,
+        }
+        lookuptable[12] = {
+            0: 12,
+            1: 12,
+            2: 12,
+            3: 12,
+            4: 12,
+            5: 12,
+            6: 12,
+            7: 12,
+            8: 12,
+            9: 12,
+            10: 12,
+            11: 12,
+            12: 12,
+        }
+        newPoints = []
+        merged = [False] * len(points)
+        for i, point1 in enumerate(points):
+            if merged[i] is False:
+                pool = [point1]
+                for j, point2 in enumerate(points):
+                    if j != i and merged[j] is False and self._are_close(point1[0], point2[0], wall_width):
+                        merged[j] = True
+                        pool.append(point2)
+                if len(pool) == 1:
+                    newPoints.append(point1)
+                    merged[i] = True
+                else:
+                    p_ = pool[0]
+                    for point_id in range(1, len(pool)):
+                        merge_to_p = pool[point_id]
+                        k_ = self._pointId2index(p_[2][1], p_[2][2])
+                        k_merge_to_p = self._pointId2index(merge_to_p[2][1], merge_to_p[2][2])
+                        knew = lookuptable[k_][k_merge_to_p]
+                        if knew is None:
+                            continue
+                        typenew = self._index2pointId(knew)
+                        p_ = [p_[0], p_[1], ["point", typenew[0], typenew[1]]]
+                        newPoints.append(p_)
+        return newPoints
+    def get_avg_wall_width(self):
+        res = 0
+        for i, w in enumerate(self.wall_objs):
+            res += w.max_width
+        res = res / float(i)
+        return res
+    def connect_walls(self, walls):
+        new_walls = []
+        num_walls = len(walls)
+        remaining_walls = list(range(1, num_walls + 1))
+        # getting pillars
+        remaining_pillar_ids = []
+        for p_id in range(1, num_walls + 1):
+            p_wall = self.find_wall_by_id(p_id, walls)
+            if p_wall.wall_is_pillar(self.avg_wall_width):
+                for wall_id in range(1, num_walls + 1):
+                    wall = self.find_wall_by_id(wall_id, walls)
+                    if p_wall.merge_possible(wall):
+                        break
+                else:
+                    remaining_walls.pop(remaining_walls.index(p_wall.id))
+                    remaining_pillar_ids.append(p_wall.id)
+        while len(remaining_walls) > 0:
+            new_wall_id = remaining_walls.pop(0)
+            new_wall = self.find_wall_by_id(new_wall_id, walls)
+            found = True
+            while found:
+                found = False
+                for merge_wall_id in remaining_walls:
+                    merged = self.find_wall_by_id(merge_wall_id, walls)
+                    temp_wall = new_wall.merge_walls(merged)
+                    if temp_wall is not None:
+                        remaining_walls.pop(remaining_walls.index(merged.id))
+                        new_wall = temp_wall
+                        found = True
+            new_walls.append(new_wall)
+        # connect pillars to walls
+        new_wall_id = num_walls + 1
+        self.pillar_walls = []
+        for id in remaining_pillar_ids:
+            w = self.find_wall_by_id(id, walls)
+            pws = w.split_pillar_wall(new_wall_id, self.avg_wall_width)
+            new_wall_id += 4
+            for pw in pws:
+                self.pillar_walls.append(pw)
+        return new_walls
+    def get_number(self, x):
+        return (x[1] - 1) * 4 + x[2]
+    def get_lineDim(self, line, lineWidth):
+        lineWidth = lineWidth or 1
+        if abs(line[0][0] - line[1][0]) > abs(line[0][1] - line[1][1]) and abs(line[0][1] - line[1][1]) <= lineWidth:
+            return 0
+        elif abs(line[0][1] - line[1][1]) > abs(line[0][0] - line[1][0]) and abs(line[0][0] - line[1][0]) <= lineWidth:
+            return 1
+        else:
+            return -1
+    def findNearestJunctionPair(self, line_1, line_2, gap):
+        minDistance = None
+        for index_1 in range(0, 2):
+            for index_2 in range(0, 2):
+                distance = calc_distance(line_1[index_1], line_2[index_2])
+                if minDistance is None or distance < minDistance:
+                    nearestPair = [index_1, index_2]
+                    minDistance = distance
+        if minDistance > gap:
+            lineDim_1 = self.get_lineDim(line_1, 1)
+            lineDim_2 = self.get_lineDim(line_2, 1)
+            if lineDim_1 + lineDim_2 == 1:
+                fixedValue_1 = (line_1[0][1 - lineDim_1] + line_1[1][1 - lineDim_1]) / 2
+                fixedValue_2 = (line_2[0][1 - lineDim_2] + line_2[1][1 - lineDim_2]) / 2
+                if line_2[0][lineDim_2] < fixedValue_1 and line_2[1][lineDim_2] > fixedValue_1:
+                    for index in range(2):
+                        distance = abs(line_1[index][lineDim_1] - fixedValue_2)
+                        if distance < minDistance:
+                            nearestPair = [index, -1]
+                            minDistance = distance
+                if line_1[0][lineDim_1] < fixedValue_2 and line_1[1][lineDim_1] > fixedValue_2:
+                    for index in range(2):
+                        distance = abs(line_2[index][lineDim_2] - fixedValue_1)
+                        if distance < minDistance:
+                            nearestPair = [-1, index]
+                            minDistance = distance
+        return nearestPair, minDistance
+    def find_wall_by_id(self, id, walls):
+        for wall in walls:
+            if wall.id == id:
+                return wall
+        return None

data_preprocess/cubicasa5k/loaders.py ADDED Viewed

	@@ -0,0 +1,158 @@

+# import lmdb
+import pickle
+import cv2
+import numpy as np
+import torch
+from house import House
+from numpy import genfromtxt
+from torch.utils.data import Dataset
+ROOM_NAMES = {
+    0: "Background",
+    1: "Outdoor",
+    2: "Wall",
+    3: "Kitchen",
+    4: "Living Room",
+    5: "Bed Room",
+    6: "Bath",
+    7: "Entry",
+    8: "Railing",
+    9: "Storage",
+    10: "Garage",
+    11: "Undefined",
+}
+ICON_NAMES = {
+    0: "No Icon",
+    1: "Window",
+    2: "Door",
+    3: "Closet",
+    4: "Electrical Applience",
+    5: "Toilet",
+    6: "Sink",
+    7: "Sauna Bench",
+    8: "Fire Place",
+    9: "Bathtub",
+    10: "Chimney",
+}
+class FloorplanSVG(Dataset):
+    def __init__(
+        self,
+        data_folder,
+        data_file,
+        is_transform=True,
+        augmentations=None,
+        img_norm=True,
+        format="txt",
+        original_size=False,
+        lmdb_folder="cubi_lmdb/",
+    ):
+        self.img_norm = img_norm
+        self.is_transform = is_transform
+        self.augmentations = augmentations
+        self.get_data = None
+        self.original_size = original_size
+        self.image_file_name = "/F1_scaled.png"
+        self.org_image_file_name = "/F1_original.png"
+        self.svg_file_name = "/model.svg"
+        if format == "txt":
+            self.get_data = self.get_txt
+        # if format == 'lmdb':
+        #     self.lmdb = lmdb.open(data_folder+lmdb_folder, readonly=True,
+        #                           max_readers=8, lock=False,
+        #                           readahead=True, meminit=False)
+        #     self.get_data = self.get_lmdb
+        #     self.is_transform = False
+        self.data_folder = data_folder
+        # Load txt file to list
+        self.folders = genfromtxt(data_folder + data_file, dtype="str")
+    def __len__(self):
+        """__len__"""
+        return len(self.folders)
+    def __getitem__(self, index):
+        sample = self.get_data(index)
+        if self.augmentations is not None:
+            sample = self.augmentations(sample)
+        if self.is_transform:
+            sample = self.transform(sample)
+        return sample
+    def get_txt(self, index):
+        fplan = cv2.imread(self.data_folder + self.folders[index] + self.image_file_name)
+        fplan = cv2.cvtColor(fplan, cv2.COLOR_BGR2RGB)  # correct color channels
+        height, width, nchannel = fplan.shape
+        fplan = np.moveaxis(fplan, -1, 0)
+        # Getting labels for segmentation and heatmaps
+        house = House(self.data_folder + self.folders[index] + self.svg_file_name, height, width)
+        # Combining them to one numpy tensor
+        label = torch.tensor(house.get_segmentation_tensor().astype(np.float32))
+        heatmaps = house.get_heatmap_dict()
+        room_polygons, room_types, icon_polygons, icon_types = house.get_coords_and_labels()
+        coef_width = 1
+        if self.original_size:
+            fplan = cv2.imread(self.data_folder + self.folders[index] + self.org_image_file_name)
+            fplan = cv2.cvtColor(fplan, cv2.COLOR_BGR2RGB)  # correct color channels
+            height_org, width_org, nchannel = fplan.shape
+            fplan = np.moveaxis(fplan, -1, 0)
+            label = label.unsqueeze(0)
+            label = torch.nn.functional.interpolate(label, size=(height_org, width_org), mode="nearest")
+            label = label.squeeze(0)
+            coef_height = float(height_org) / float(height)
+            coef_width = float(width_org) / float(width)
+            for key, value in heatmaps.items():
+                heatmaps[key] = [(int(round(x * coef_width)), int(round(y * coef_height))) for x, y in value]
+            new_room_polygons = []
+            for poly in room_polygons:
+                new_room_polygons.append([(int(round(x * coef_width)), int(round(y * coef_height))) for x, y in poly])
+            room_polygons = new_room_polygons
+            new_icon_polygons = []
+            for poly in icon_polygons:
+                new_icon_polygons.append([(int(round(x * coef_width)), int(round(y * coef_height))) for x, y in poly])
+            icon_polygons = new_icon_polygons
+        img = torch.tensor(fplan.astype(np.float32))
+        sample = {
+            "image": img,
+            "label": label,
+            "folder": self.folders[index],
+            "heatmaps": heatmaps,
+            "scale": coef_width,
+            "room_polygon": room_polygons,
+            "room_type": room_types,
+            "icon_polygon": icon_polygons,
+            "icon_type": icon_types,
+        }
+        return sample
+    def get_lmdb(self, index):
+        key = self.folders[index].encode()
+        with self.lmdb.begin(write=False) as f:
+            data = f.get(key)
+        sample = pickle.loads(data)
+        return sample
+    def transform(self, sample):
+        fplan = sample["image"]
+        # Normalization values to range -1 and 1
+        fplan = 2 * (fplan / 255.0) - 1
+        sample["image"] = fplan
+        return sample

data_preprocess/cubicasa5k/plotting.py ADDED Viewed

	@@ -0,0 +1,820 @@

+import matplotlib.path as mplp
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib import cm, colors
+from shapely.geometry import Point, Polygon
+from skimage import draw
+def discrete_cmap_furukawa():
+    """create a colormap with N (N<15) discrete colors and register it"""
+    # define individual colors as hex values
+    cpool = [
+        "#696969",
+        "#b3de69",
+        "#ffffb3",
+        "#8dd3c7",
+        "#fdb462",
+        "#fccde5",
+        "#80b1d3",
+        "#d9d9d9",
+        "#fb8072",
+        "#577a4d",
+        "white",
+        "#000000",
+        "#e31a1c",
+    ]
+    cmap3 = colors.ListedColormap(cpool, "rooms_furukawa")
+    cm.register_cmap(cmap=cmap3)
+    cpool = [
+        "#ede676",
+        "#8dd3c7",
+        "#b15928",
+        "#fdb462",
+        "#ffff99",
+        "#fccde5",
+        "#80b1d3",
+        "#d9d9d9",
+        "#fb8072",
+        "#696969",
+        "#577a4d",
+        "#e31a1c",
+        "#42ef59",
+        "#8c595a",
+        "#3131e5",
+        "#48e0e6",
+        "white",
+    ]
+    cmap3 = colors.ListedColormap(cpool, "icons_furukawa")
+    cm.register_cmap(cmap=cmap3)
+def drawJunction(h, point, point_type, width, height):
+    lineLength = 15
+    lineWidth = 10
+    x, y = point
+    # plt.text(x,y,str(index),fontsize=25,color='r')
+    if point_type == -1:
+        h.scatter(x, y, color="#6488ea")
+    ###########################
+    # o
+    # | #6488ea soft blue
+    # | drawcode = [1,1]
+    #
+    ###########################
+    if point_type == 0:
+        h.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="#6488ea")
+        # plt.scatter(x, y-10, c='k')
+    ###########################
+    #
+    #  ---o #6241c7 bluey purple
+    #     drawcode = [1,2]
+    #
+    ###########################
+    elif point_type == 1:
+        h.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="#6241c7")
+        # plt.scatter(x+10, y, c='k')
+    ###########################
+    #    |
+    #    | drawcode = [1,3]
+    #    o #056eee cerulean blue
+    #
+    ###########################
+    elif point_type == 2:
+        h.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="#056eee")
+        # plt.scatter(x, y+10, c='k')
+    ###########################
+    #
+    #  drawcode = [1,4]
+    #
+    #  o--- #004577 prussian blue
+    #
+    ###########################
+    elif point_type == 3:
+        h.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="#004577")
+        # plt.scatter(x-10, y, c='k')
+    ###########################
+    #
+    # |--- drawcode = [2,3]
+    # |
+    #
+    ###########################
+    elif point_type == 6:
+        h.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="#04d8b2")
+        h.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="#04d8b2")
+    ###########################
+    #
+    #  ---|
+    #     | drawcode = [2,4]
+    #
+    ###########################
+    elif point_type == 7:
+        h.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="#cdfd02")
+        h.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="#cdfd02")
+    ###########################
+    #    |
+    # ---| drawcode = [2,1]
+    #
+    #
+    ###########################
+    elif point_type == 4:
+        h.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="#ff81c0")
+        h.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="#ff81c0")
+    ###########################
+    #
+    #  |
+    #  | drawcode = [2,2]
+    #  --
+    #
+    ###########################
+    elif point_type == 5:
+        h.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="#f97306")
+        h.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="#f97306")
+    ###########################
+    #
+    # |
+    # |--- drawcode = [3,4]
+    # |
+    #
+    ###########################
+    elif point_type == 11:
+        h.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="b")
+        h.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="b")
+        h.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="b")
+    ###########################
+    #
+    # ---
+    #  |  drawcode = [3,1]
+    #  |
+    #
+    ###########################
+    elif point_type == 8:
+        h.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="y")
+        h.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="y")
+        h.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="y")
+    ###########################
+    #
+    #    |
+    # ---| drawcode = [3,2]
+    #    |
+    #
+    ###########################
+    elif point_type == 9:
+        h.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="r")
+        h.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="r")
+        h.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="r")
+    ###########################
+    #
+    #  |
+    #  | drawcode = [3,3]
+    # ---
+    #
+    ###########################
+    elif point_type == 10:
+        h.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="m")
+        h.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="m")
+        h.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="m")
+    ###########################
+    #
+    #  |
+    # --- drawcode = [4,1]
+    #  |
+    #
+    ###########################
+    elif point_type == 12:
+        h.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="k")
+        h.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="k")
+        h.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="k")
+        h.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="k")
+    lineLength = 10
+    lineWidth = 5
+    ###########################
+    # o--- opening left
+    ###########################
+    if point_type == 13:
+        h.plot([x], [y], "o", markersize=30, color="red")
+        h.plot([x], [y], "o", markersize=25, color="white")
+        h.text(x, y, "OL", fontsize=30, color="magenta")
+    ###########################
+    # ---o opening right
+    ###########################
+    elif point_type == 14:
+        h.plot([x], [y], "o", markersize=30, color="red")
+        h.plot([x], [y], "o", markersize=25, color="white")
+        h.text(x, y, "OR", fontsize=30, color="magenta")
+    ###########################
+    # o opening up
+    # |
+    # |
+    ###########################
+    elif point_type == 15:
+        h.plot([x], [y], "o", markersize=30, color="red")
+        h.plot([x], [y], "o", markersize=25, color="white")
+        h.text(x, y, "OU", fontsize=30, color="mediumblue")
+    ###########################
+    # | opening down
+    # |
+    # o
+    ###########################
+    elif point_type == 16:
+        h.plot([x], [y], "o", markersize=30, color="red")
+        h.plot([x], [y], "o", markersize=25, color="white")
+        h.text(x, y, "OD", fontsize=30, color="mediumblue")
+    ###########################
+    #
+    # |--- drawcode = [2,3]
+    # |
+    #
+    ###########################
+    elif point_type == 17:
+        h.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="indianred")
+        h.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="indianred")
+    ###########################
+    #
+    #  ---|
+    #     | drawcode = [2,4]
+    #
+    ###########################
+    elif point_type == 18:
+        h.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="darkred")
+        h.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="darkred")
+    ###########################
+    #
+    #  |
+    #  | drawcode = [2,2]
+    #  --
+    #
+    ###########################
+    elif point_type == 19:
+        h.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="salmon")
+        h.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="salmon")
+    ###########################
+    #    |
+    # ---| drawcode = [2,1]
+    #
+    #
+    ###########################
+    elif point_type == 20:
+        h.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="orangered")
+        h.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="orangered")
+def draw_junction_from_dict(point_dict, width, height, size=1, fontsize=30):
+    index = 0
+    markersize_large = 20 * size
+    markersize_small = 15 * size
+    for point_type, locations in point_dict.items():
+        for loc in locations:
+            x, y = loc
+            lineLength = 20 * size
+            lineWidth = 20 * size
+            # plt.text(x,y,str(index),fontsize=25,color='r')
+            ###########################
+            # o
+            # | #6488ea soft blue
+            # | drawcode = [1,1]
+            #
+            ###########################
+            if point_type == 0:
+                plt.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="#6488ea")
+                # plt.scatter(x, y-10, c='k')
+            ###########################
+            #
+            #  ---o #6241c7 bluey purple
+            #     drawcode = [1,2]
+            #
+            ###########################
+            elif point_type == 1:
+                plt.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="#6241c7")
+                # plt.scatter(x+10, y, c='k')
+            ###########################
+            #    |
+            #    | drawcode = [1,3]
+            #    o #056eee cerulean blue
+            #
+            ###########################
+            elif point_type == 2:
+                plt.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="#056eee")
+                # plt.scatter(x, y+10, c='k')
+            ###########################
+            #
+            #  drawcode = [1,4]
+            #
+            #  o--- #004577 prussian blue
+            #
+            ###########################
+            elif point_type == 3:
+                plt.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="#004577")
+                # plt.scatter(x-10, y, c='k')
+            ###########################
+            #
+            # |--- drawcode = [2,3]
+            # |
+            #
+            ###########################
+            elif point_type == 6:
+                plt.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="#04d8b2")
+                plt.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="#04d8b2")
+            ###########################
+            #
+            #  ---|
+            #     | drawcode = [2,4]
+            #
+            ###########################
+            elif point_type == 7:
+                plt.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="#cdfd02")
+                plt.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="#cdfd02")
+            ###########################
+            #    |
+            # ---| drawcode = [2,1]
+            #
+            #
+            ###########################
+            elif point_type == 4:
+                plt.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="#ff81c0")
+                plt.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="#ff81c0")
+            ###########################
+            #
+            #  |
+            #  | drawcode = [2,2]
+            #  --
+            #
+            ###########################
+            elif point_type == 5:
+                plt.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="#f97306")
+                plt.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="#f97306")
+            ###########################
+            #
+            # |
+            # |--- drawcode = [3,4]
+            # |
+            #
+            ###########################
+            elif point_type == 11:
+                plt.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="b")
+                plt.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="b")
+                plt.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="b")
+            ###########################
+            #
+            # ---
+            #  |  drawcode = [3,1]
+            #  |
+            #
+            ###########################
+            elif point_type == 8:
+                plt.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="y")
+                plt.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="y")
+                plt.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="y")
+            ###########################
+            #
+            #    |
+            # ---| drawcode = [3,2]
+            #    |
+            #
+            ###########################
+            elif point_type == 9:
+                plt.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="r")
+                plt.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="r")
+                plt.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="r")
+            ###########################
+            #
+            #  |
+            #  | drawcode = [3,3]
+            # ---
+            #
+            ###########################
+            elif point_type == 10:
+                plt.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="m")
+                plt.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="m")
+                plt.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="m")
+            ###########################
+            #
+            #  |
+            # --- drawcode = [4,1]
+            #  |
+            #
+            ###########################
+            elif point_type == 12:
+                plt.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="k")
+                plt.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="k")
+                plt.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="k")
+                plt.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="k")
+            lineLength = 15 * size
+            lineWidth = 15 * size
+            ###########################
+            # o--- opening left
+            ###########################
+            if point_type == 13:
+                plt.plot([x], [y], "o", markersize=markersize_large, color="red")
+                plt.plot([x], [y], "o", markersize=markersize_small, color="white")
+                plt.text(x, y, "OL", fontsize=fontsize, color="magenta")
+            ###########################
+            # ---o opening right
+            ###########################
+            elif point_type == 14:
+                plt.plot([x], [y], "o", markersize=markersize_large, color="red")
+                plt.plot([x], [y], "o", markersize=markersize_small, color="white")
+                plt.text(x, y, "OR", fontsize=fontsize, color="magenta")
+            ###########################
+            # o opening up
+            # |
+            # |
+            ###########################
+            elif point_type == 15:
+                plt.plot([x], [y], "o", markersize=markersize_large, color="red")
+                plt.plot([x], [y], "o", markersize=markersize_small, color="white")
+                plt.text(x, y, "OU", fontsize=fontsize, color="mediumblue")
+            ###########################
+            # | opening down
+            # |
+            # o
+            ###########################
+            elif point_type == 16:
+                plt.plot([x], [y], "o", markersize=markersize_large, color="red")
+                plt.plot([x], [y], "o", markersize=markersize_small, color="white")
+                plt.text(x, y, "OD", fontsize=fontsize, color="mediumblue")
+            ###########################
+            #
+            # |--- drawcode = [2,3]
+            # |
+            #
+            ###########################
+            elif point_type == 17:
+                plt.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="indianred")
+                plt.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="indianred")
+            ###########################
+            #
+            #  ---|
+            #     | drawcode = [2,4]
+            #
+            ###########################
+            elif point_type == 18:
+                plt.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="darkred")
+                plt.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="darkred")
+            ###########################
+            #
+            #  |
+            #  | drawcode = [2,2]
+            #  --
+            #
+            ###########################
+            elif point_type == 19:
+                plt.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="salmon")
+                plt.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="salmon")
+            ###########################
+            #    |
+            # ---| drawcode = [2,1]
+            #
+            #
+            ###########################
+            elif point_type == 20:
+                plt.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="orangered")
+                plt.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="orangered")
+            index += 1
+def plot_pre_rec_4(instances, classes):
+    walls = ["Wall", "Railing"]
+    openings = ["Window", "Door"]
+    rooms = [
+        "Outdoor",
+        "Kitchen",
+        "Living Room",
+        "Bed Room",
+        "Entry",
+        "Dining",
+        "Storage",
+        "Garage",
+        "Undefined Room",
+        "Sauna",
+        "Fire Place",
+        "Bathtub",
+        "Chimney",
+    ]
+    icons = [
+        "Bath",
+        "Closet",
+        "Electrical Appliance",
+        "Toilet",
+        "Shower",
+        "Sink",
+        "Sauna",
+        "Fire Place",
+        "Bathtub",
+        "Chimney",
+    ]
+    def make_sub_plot(classes_to_plot):
+        plt.ylim([0.0, 1.0])
+        plt.xlim([0.0, 1.0])
+        plt.xlabel("Recall")
+        plt.ylabel("Precision")
+        indx = [classes.index(i) for i in classes_to_plot]
+        ins = instances[:, indx].sum(axis=1)
+        correct = ins[:, 0]
+        false_positive = ins[:, 2]
+        false_negatives = ins[:, 1]
+        precision = correct / (correct + false_positive)
+        recall = correct / (correct + false_negatives)
+        plt.step(recall[::-1], precision, color="b", alpha=0.2, where="post")
+        plt.fill_between(recall[::-1], precision, step="post", alpha=0.2, color="b")
+    plt.subplot(2, 2, 1)
+    plt.title("Walls")
+    make_sub_plot(walls)
+    plt.subplot(2, 2, 2)
+    plt.title("Openings")
+    make_sub_plot(openings)
+    plt.subplot(2, 2, 3)
+    plt.title("Rooms")
+    make_sub_plot(rooms)
+    plt.subplot(2, 2, 4)
+    plt.title("Icons")
+    make_sub_plot(icons)
+def discrete_cmap():
+    """create a colormap with N (N<15) discrete colors and register it"""
+    # define individual colors as hex values
+    cpool = [
+        "#DCDCDC",
+        "#b3de69",
+        "#000000",
+        "#8dd3c7",
+        "#fdb462",
+        "#fccde5",
+        "#80b1d3",
+        "#808080",
+        "#fb8072",
+        "#696969",
+        "#577a4d",
+        "#ffffb3",
+    ]
+    cmap3 = colors.ListedColormap(cpool, "rooms")
+    cm.register_cmap(cmap=cmap3)
+    cpool = [
+        "#DCDCDC",
+        "#8dd3c7",
+        "#b15928",
+        "#fdb462",
+        "#ffff99",
+        "#fccde5",
+        "#80b1d3",
+        "#808080",
+        "#fb8072",
+        "#696969",
+        "#577a4d",
+    ]
+    cmap3 = colors.ListedColormap(cpool, "icons")
+    cm.register_cmap(cmap=cmap3)
+    """create a colormap with N (N<15) discrete colors and register it"""
+    # define individual colors as hex values
+    cpool = [
+        "#DCDCDC",
+        "#b3de69",
+        "#000000",
+        "#8dd3c7",
+        "#fdb462",
+        "#fccde5",
+        "#80b1d3",
+        "#808080",
+        "#fb8072",
+        "#696969",
+        "#577a4d",
+        "#ffffb3",
+        "d3d5d7",
+    ]
+    cmap3 = colors.ListedColormap(cpool, "rooms_furu")
+    cm.register_cmap(cmap=cmap3)
+    cpool = [
+        "#DCDCDC",
+        "#8dd3c7",
+        "#b15928",
+        "#fdb462",
+        "#ffff99",
+        "#fccde5",
+        "#80b1d3",
+        "#808080",
+        "#fb8072",
+        "#696969",
+        "#577a4d",
+    ]
+    cmap3 = colors.ListedColormap(cpool, "rooms_furu")
+    cm.register_cmap(cmap=cmap3)
+def segmentation_plot(rooms_pred, icons_pred, rooms_label, icons_label):
+    room_classes = [
+        "Background",
+        "Outdoor",
+        "Wall",
+        "Kitchen",
+        "Living Room",
+        "Bed Room",
+        "Bath",
+        "Entry",
+        "Railing",
+        "Storage",
+        "Garage",
+        "Undefined",
+    ]
+    icon_classes = [
+        "No Icon",
+        "Window",
+        "Door",
+        "Closet",
+        "Electrical Applience",
+        "Toilet",
+        "Sink",
+        "Sauna Bench",
+        "Fire Place",
+        "Bathtub",
+        "Chimney",
+    ]
+    discrete_cmap()  # custom colormap
+    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(30, 15))
+    axes[0].set_title("Room Ground Truth")
+    axes[0].imshow(rooms_label, cmap="rooms", vmin=0, vmax=len(room_classes) - 1)
+    axes[1].set_title("Room Prediction")
+    im = axes[1].imshow(rooms_pred, cmap="rooms", vmin=0, vmax=len(room_classes) - 1)
+    cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
+    cbar = fig.colorbar(im, cax=cbar_ax, ticks=np.arange(12) + 0.5)
+    fig.subplots_adjust(right=0.8)
+    cbar.ax.set_yticklabels(room_classes)
+    plt.show()
+    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(30, 15))
+    axes[0].set_title("Icon Ground Truth")
+    axes[0].imshow(icons_label, cmap="icons", vmin=0, vmax=len(icon_classes) - 1)
+    axes[1].set_title("Icon Prediction")
+    im = axes[1].imshow(icons_pred, cmap="icons", vmin=0, vmax=len(icon_classes) - 1)
+    cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
+    cbar = fig.colorbar(im, cax=cbar_ax, ticks=np.arange(11) + 0.5)
+    fig.subplots_adjust(right=0.8)
+    cbar.ax.set_yticklabels(icon_classes)
+    plt.show()
+def polygons_to_image(polygons, types, room_polygons, room_types, height, width):
+    pol_room_seg = np.zeros((height, width))
+    pol_icon_seg = np.zeros((height, width))
+    for i, pol in enumerate(room_polygons):
+        mask = shp_mask(pol, np.arange(width), np.arange(height))
+        #         jj, ii = draw.polygon(pol[:, 1], pol[:, 0])
+        pol_room_seg[mask] = room_types[i]["class"]
+    for i, pol in enumerate(polygons):
+        jj, ii = draw.polygon(pol[:, 1], pol[:, 0])
+        if types[i]["type"] == "wall":
+            pol_room_seg[jj, ii] = types[i]["class"]
+        else:
+            pol_icon_seg[jj, ii] = types[i]["class"]
+    return pol_room_seg, pol_icon_seg
+def plot_room(r, name, n_classes=12):
+    discrete_cmap()  # custom colormap
+    plt.figure(figsize=(40, 30))
+    plt.axis("off")
+    plt.tight_layout()
+    plt.imshow(r, cmap="rooms", vmin=0, vmax=n_classes - 1)
+    plt.savefig(name + ".png", format="png")
+    plt.show()
+def plot_icon(i, name, n_classes=11):
+    discrete_cmap()  # custom colormap
+    plt.figure(figsize=(40, 30))
+    plt.axis("off")
+    plt.tight_layout()
+    plt.imshow(i, cmap="icons", vmin=0, vmax=n_classes - 1)
+    plt.savefig(name + ".png", format="png")
+    plt.show()
+def plot_heatmaps(h, name):
+    for index, i in enumerate(h):
+        plt.figure(figsize=(40, 30))
+        plt.axis("off")
+        plt.tight_layout()
+        plt.imshow(i, cmap="Reds", vmin=0, vmax=1)
+        plt.savefig(name + str(index) + ".png", format="png")
+        plt.show()
+def outline_to_mask(line, x, y):
+    """Create mask from outline contour
+    Parameters
+    ----------
+    line: array-like (N, 2)
+    x, y: 1-D grid coordinates (input for meshgrid)
+    Returns
+    -------
+    mask : 2-D boolean array (True inside)
+    Examples
+    --------
+    >>> from shapely.geometry import Point
+    >>> poly = Point(0,0).buffer(1)
+    >>> x = np.linspace(-5,5,100)
+    >>> y = np.linspace(-5,5,100)
+    >>> mask = outline_to_mask(poly.boundary, x, y)
+    """
+    mpath = mplp.Path(line)
+    X, Y = np.meshgrid(x, y)
+    points = np.array((X.flatten(), Y.flatten())).T
+    mask = mpath.contains_points(points).reshape(X.shape)
+    return mask
+def _grid_bbox(x, y):
+    dx = dy = 0
+    return x[0] - dx / 2, x[-1] + dx / 2, y[0] - dy / 2, y[-1] + dy / 2
+def _bbox_to_rect(bbox):
+    l, r, b, t = bbox
+    return Polygon([(l, b), (r, b), (r, t), (l, t)])
+def shp_mask(shp, x, y, m=None):
+    """
+    Adapted from code written by perrette
+    form: https://gist.github.com/perrette/a78f99b76aed54b6babf3597e0b331f8
+    Use recursive sub-division of space and shapely contains method to create a raster mask on a regular grid.
+    Parameters
+    ----------
+    shp : shapely's Polygon (or whatever with a "contains" method and intersects method)
+    x, y : 1-D numpy arrays defining a regular grid
+    m : mask to fill, optional (will be created otherwise)
+    Returns
+    -------
+    m : boolean 2-D array, True inside shape.
+    Examples
+    --------
+    >>> from shapely.geometry import Point
+    >>> poly = Point(0,0).buffer(1)
+    >>> x = np.linspace(-5,5,100)
+    >>> y = np.linspace(-5,5,100)
+    >>> mask = shp_mask(poly, x, y)
+    """
+    rect = _bbox_to_rect(_grid_bbox(x, y))
+    if m is None:
+        m = np.zeros((y.size, x.size), dtype=bool)
+    if not shp.intersects(rect):
+        m[:] = False
+    elif shp.contains(rect):
+        m[:] = True
+    else:
+        k, l = m.shape
+        if k == 1 and l == 1:
+            m[:] = shp.contains(Point(x[0], y[0]))
+        elif k == 1:
+            m[:, : l // 2] = shp_mask(shp, x[: l // 2], y, m[:, : l // 2])
+            m[:, l // 2 :] = shp_mask(shp, x[l // 2 :], y, m[:, l // 2 :])
+        elif l == 1:
+            m[: k // 2] = shp_mask(shp, x, y[: k // 2], m[: k // 2])
+            m[k // 2 :] = shp_mask(shp, x, y[k // 2 :], m[k // 2 :])
+        else:
+            m[: k // 2, : l // 2] = shp_mask(shp, x[: l // 2], y[: k // 2], m[: k // 2, : l // 2])
+            m[: k // 2, l // 2 :] = shp_mask(shp, x[l // 2 :], y[: k // 2], m[: k // 2, l // 2 :])
+            m[k // 2 :, : l // 2] = shp_mask(shp, x[: l // 2], y[k // 2 :], m[k // 2 :, : l // 2])
+            m[k // 2 :, l // 2 :] = shp_mask(shp, x[l // 2 :], y[k // 2 :], m[k // 2 :, l // 2 :])
+    return m

data_preprocess/cubicasa5k/run.sh ADDED Viewed

	@@ -0,0 +1,15 @@

+# create COCO-style dataset for CubiCasa5k
+python create_coco_cc5k.py --data_root=data/cubicasa5k/ \
+    --output=data/coco_cubicasa5k_nowalls_v4/ \
+    --disable_wd2line
+# Split example has more than 1 floorplan into separate samples
+python floorplan_extraction.py \
+    --data_root data/coco_cubicasa5k_nowalls_v4/ \
+    --output data/coco_cubicasa5k_nowalls_v4-1_refined/
+# Merge individual JSONs into single JSON file per split (train/val/test)
+# This must be done after floorplan_extraction.py
+python combine_json.py \
+    --input data/coco_cubicasa5k_nowalls_v4-1_refined/ \
+    --output data/coco_cubicasa5k_nowalls_v4-1_refined/annotations/ \

data_preprocess/cubicasa5k/svg_utils.py ADDED Viewed

	@@ -0,0 +1,746 @@

+import math
+from logging import warning
+import numpy as np
+from skimage.draw import polygon
+from svgpathtools import parse_path
+def get_room_number(e, rooms):
+    name_list = e.getAttribute("class").split(" ")
+    room_type = name_list[1]
+    try:
+        return rooms[room_type]
+    except KeyError:
+        warning("Room type " + e.getAttribute("class") + " not defined.")
+        return rooms["Undefined"]
+def get_icon_number(e, icons):
+    name_list = e.getAttribute("class").split(" ")
+    icon_type = name_list[1]
+    try:
+        return icons[icon_type]
+    except KeyError:
+        warning("Icon type " + e.getAttribute("class") + " not defined.")
+        return icons["Misc"]
+def get_icon(ee):
+    parent_transform = None
+    if ee.parentNode.getAttribute("class") == "FixedFurnitureSet":
+        parent_transform = ee.parentNode.getAttribute("transform")
+        strings = parent_transform.split(",")
+        a_p = float(strings[0][7:])
+        b_p = float(strings[1])
+        c_p = float(strings[2])
+        d_p = float(strings[3])
+        e_p = float(strings[-2])
+        f_p = float(strings[-1][:-1])
+        M_p = np.array([[a_p, c_p, e_p], [b_p, d_p, f_p], [0, 0, 1]])
+    transform = ee.getAttribute("transform")
+    strings = transform.split(",")
+    a = float(strings[0][7:])
+    b = float(strings[1])
+    c = float(strings[2])
+    d = float(strings[3])
+    e = float(strings[-2])
+    f = float(strings[-1][:-1])
+    M = np.array([[a, c, e], [b, d, f], [0, 0, 1]])
+    X = np.array([])
+    Y = np.array([])
+    try:
+        toilet = next(p for p in ee.childNodes if p.nodeName == "g" and p.getAttribute("class") == "BoundaryPolygon")
+        for p in toilet.childNodes:
+            if p.nodeName == "polygon":
+                X, Y = get_icon_polygon(p)
+                break
+        else:
+            x_all, y_all = get_corners(toilet)
+            points = np.column_stack((x_all, y_all))
+            X, Y = get_max_corners(points)
+            # if p.nodeName == "path":
+            # X, Y = get_icon_path(p)
+    except StopIteration:
+        X, Y = make_boudary_polygon(ee)
+    if len(X) < 4:
+        return None, None, X, Y
+    if parent_transform is not None:
+        for i in range(len(X)):
+            v = np.matrix([[X[i]], [Y[i]], [1]])
+            vv = np.matmul(M, v)
+            new_x, new_y, _ = np.round(np.matmul(M_p, vv))
+            X[i] = new_x
+            Y[i] = new_y
+    else:
+        for i in range(len(X)):
+            v = np.matrix([[X[i]], [Y[i]], [1]])
+            vv = np.matmul(M, v)
+            new_x, new_y, _ = np.round(vv)
+            X[i] = new_x
+            Y[i] = new_y
+    rr, cc = polygon(Y, X)
+    return rr, cc, X, Y
+def get_corners(g):
+    x_all, y_all = [], []
+    for pol in g.childNodes:
+        if pol.nodeName == "polygon":
+            x, y = get_icon_polygon(pol)
+            x_all = np.append(x_all, x)
+            y_all = np.append(y_all, y)
+        elif pol.nodeName == "path":
+            x, y = get_icon_path(pol)
+            x_all = np.append(x_all, x)
+            y_all = np.append(y_all, y)
+        elif pol.nodeName == "rect":
+            x = pol.getAttribute("x")
+            if x == "":
+                x = 1.0
+            else:
+                x = float(x)
+            y = pol.getAttribute("y")
+            if y == "":
+                y = 1.0
+            else:
+                y = float(y)
+            x_all = np.append(x_all, x)
+            y_all = np.append(y_all, y)
+            w = float(pol.getAttribute("width"))
+            h = float(pol.getAttribute("height"))
+            x_all = np.append(x_all, x + w)
+            y_all = np.append(y_all, y + h)
+    return x_all, y_all
+def get_max_corners(points):
+    if len(points) == 0:
+        return [], []
+    minx, miny = float("inf"), float("inf")
+    maxx, maxy = float("-inf"), float("-inf")
+    for x, y in points:
+        # Set min coords
+        if x < minx:
+            minx = x
+        if y < miny:
+            miny = y
+        # Set max coords
+        if x > maxx:
+            maxx = x
+        elif y > maxy:
+            maxy = y
+    X = np.array([minx, maxx, maxx, minx])
+    Y = np.array([miny, miny, maxy, maxy])
+    return X, Y
+def make_boudary_polygon(pol):
+    g_gen = (c for c in pol.childNodes if c.nodeName == "g")
+    x_all, y_all = [], []
+    for g in g_gen:
+        x, y = get_corners(g)
+        x_all = np.append(x_all, x)
+        y_all = np.append(y_all, y)
+    points = np.column_stack((x_all, y_all))
+    X, Y = get_max_corners(points)
+    return X, Y
+def get_icon_path(pol):
+    path = pol.getAttribute("d")
+    try:
+        path_alt = parse_path(path)
+        minx, maxx, miny, maxy = path_alt.bbox()
+    except ValueError as e:
+        print("Error handled")
+        print(e)
+        return np.array([]), np.array([])
+    X = np.array([minx, maxx, maxx, minx])
+    Y = np.array([miny, miny, maxy, maxy])
+    if np.unique(X).size == 1 or np.unique(Y).size == 1:
+        return np.array([]), np.array([])
+    return X, Y
+def get_icon_polygon(pol):
+    points = pol.getAttribute("points").split(" ")
+    return get_XY(points)
+def get_XY(points):
+    if points[-1] == "":
+        points = points[:-1]
+    if points[0] == "":
+        points = points[1:]
+    X, Y = np.array([]), np.array([])
+    i = 0
+    for a in points:
+        if "," in a:
+            if len(a) == 2:
+                x, y = a.split(",")
+            else:
+                num_list = a.split(",")
+                x, y = num_list[0], num_list[1]
+            X = np.append(X, np.round(float(x)))
+            Y = np.append(Y, np.round(float(y)))
+        else:
+            # if no comma every other is x and every other is y
+            if i % 2:
+                Y = np.append(Y, float(a))
+            else:
+                X = np.append(X, float(a))
+        i += 1
+    return X, Y
+def get_points(e):
+    pol = next(p for p in e.childNodes if p.nodeName == "polygon")
+    points = pol.getAttribute("points").split(" ")
+    points = points[:-1]
+    X, Y = np.array([]), np.array([])
+    for a in points:
+        x, y = a.split(",")
+        X = np.append(X, np.round(float(x)))
+        Y = np.append(Y, np.round(float(y)))
+    return X, Y
+def get_direction(X, Y):
+    max_diff_X = abs(max(X) - min(X))
+    max_diff_Y = abs(max(Y) - min(Y))
+    if max_diff_X > max_diff_Y:
+        return "H"  # horizontal
+    else:
+        return "V"  # vertical
+def get_polygon(e):
+    pol = next(p for p in e.childNodes if p.nodeName == "polygon")
+    points = pol.getAttribute("points").split(" ")
+    points = points[:-1]
+    X, Y = np.array([]), np.array([])
+    for a in points:
+        y, x = a.split(",")
+        X = np.append(X, np.round(float(x)))
+        Y = np.append(Y, np.round(float(y)))
+    rr, cc = polygon(X, Y)
+    return rr, cc
+def calc_distance(point_1, point_2):
+    return math.sqrt(math.pow(point_1[0] - point_2[0], 2) + math.pow(point_1[1] - point_2[1], 2))
+def calc_center(points):
+    return list(np.mean(np.array(points), axis=0))
+def get_gaussian2D(ndim, sigma=0.25):
+    over_sigmau = 1.0 / (sigma * ndim)
+    over_sigmav = 1.0 / (sigma * ndim)
+    dst_data = np.zeros((ndim, ndim))
+    mean_u = 0.5 * ndim + 0.5
+    mean_v = 0.5 * ndim + 0.5
+    for v in range(ndim):
+        for u in range(ndim):
+            du = (u + 1 - mean_u) * over_sigmau
+            dv = (v + 1 - mean_v) * over_sigmav
+            value = np.exp(-0.5 * (du * du + dv * dv))
+            dst_data[v][u] = value
+    return dst_data
+def draw_junction(index, point, width, height, axes):
+    lineLength = 15
+    lineWidth = 7
+    x, y = point[0]
+    axes.text(x, y, str(index), fontsize=15, color="k")
+    ###########################
+    # o
+    # | #6488ea soft blue
+    # | drawcode = [1,1]
+    #
+    ###########################
+    if point[2][1] == 1 and point[2][2] == 1:
+        axes.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="#6488ea")
+    ###########################
+    #
+    #  ---o #6241c7 bluey purple
+    #     drawcode = [1,2]
+    #
+    ###########################
+    elif point[2][1] == 1 and point[2][2] == 2:
+        axes.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="#6241c7")
+    ###########################
+    #    |
+    #    | drawcode = [1,3]
+    #    o #056eee cerulean blue
+    #
+    ###########################
+    elif point[2][1] == 1 and point[2][2] == 3:
+        axes.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="#056eee")
+    ###########################
+    #
+    #  drawcode = [1,4]
+    #
+    #  o--- #004577 prussian blue
+    #
+    ###########################
+    elif point[2][1] == 1 and point[2][2] == 4:
+        axes.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="#004577")
+    ###########################
+    #
+    # |--- drawcode = [2,3]
+    # |
+    #
+    ###########################
+    elif point[2][1] == 2 and point[2][2] == 3:
+        axes.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="#04d8b2")
+        axes.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="#04d8b2")
+    ###########################
+    #
+    #  ---|
+    #     | drawcode = [2,4]
+    #
+    ###########################
+    elif point[2][1] == 2 and point[2][2] == 4:
+        axes.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="#cdfd02")
+        axes.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="#cdfd02")
+    ###########################
+    #    |
+    # ---| drawcode = [2,1]
+    #
+    #
+    ###########################
+    elif point[2][1] == 2 and point[2][2] == 1:
+        axes.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="#ff81c0")
+        axes.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="#ff81c0")
+    ###########################
+    #
+    #  |
+    #  | drawcode = [2,2]
+    #  --
+    #
+    ###########################
+    elif point[2][1] == 2 and point[2][2] == 2:
+        axes.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="#f97306")
+        axes.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="#f97306")
+    ###########################
+    #
+    # |
+    # |--- drawcode = [3,4]
+    # |
+    #
+    ###########################
+    elif point[2][1] == 3 and point[2][2] == 4:
+        axes.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="b")
+        axes.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="b")
+        axes.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="b")
+    ###########################
+    #
+    # ---
+    #  |  drawcode = [3,1]
+    #  |
+    #
+    ###########################
+    elif point[2][1] == 3 and point[2][2] == 1:
+        axes.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="y")
+        axes.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="y")
+        axes.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="y")
+    ###########################
+    #
+    #    |
+    # ---| drawcode = [3,2]
+    #    |
+    #
+    ###########################
+    elif point[2][1] == 3 and point[2][2] == 2:
+        axes.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="r")
+        axes.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="r")
+        axes.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="r")
+    ###########################
+    #
+    #  |
+    #  | drawcode = [3,3]
+    # ---
+    #
+    ###########################
+    elif point[2][1] == 3 and point[2][2] == 3:
+        axes.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="m")
+        axes.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="m")
+        axes.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="m")
+    ###########################
+    #
+    #  |
+    # --- drawcode = [4,1]
+    #  |
+    #
+    ###########################
+    elif point[2][1] == 4 and point[2][2] == 1:
+        axes.plot([x, min(x + lineLength, width - 1)], [y, y], linewidth=lineWidth, color="k")
+        axes.plot([x, max(x - lineLength, 0)], [y, y], linewidth=lineWidth, color="k")
+        axes.plot([x, x], [y, max(y - lineLength, 0)], linewidth=lineWidth, color="k")
+        axes.plot([x, x], [y, min(y + lineLength, height - 1)], linewidth=lineWidth, color="k")
+class Wall:
+    def __init__(self, id, end_points, direction, width, name):
+        self.id = id
+        self.name = name
+        self.end_points = end_points
+        self.direction = direction
+        self.max_width = width
+        self.min_width = width
+    def change_end_points(self):
+        if self.direction == "V":
+            self.end_points[0][0] = np.mean(np.array(self.min_coord))
+            self.end_points[1][0] = self.end_points[0][0]
+        elif self.direction == "H":
+            self.end_points[0][1] = np.mean(np.array(self.min_coord))
+            self.end_points[1][1] = self.end_points[0][1]
+    def get_length(self, end_points):
+        return calc_distance(end_points[0], end_points[1])
+class LineWall(Wall):
+    def __init__(self, id, end_points, direction, width, name):
+        Wall.__init__(self, id, end_points, direction, width, name)
+class PolygonWall(Wall):
+    def __init__(self, e, id, shape=None):
+        self.id = id
+        self.name = e.getAttribute("id")
+        self.X, self.Y = self.get_points(e)
+        if abs(max(self.X) - min(self.X)) < 4 or abs(max(self.Y) - min(self.Y)) < 4:
+            # wall is too small and we ignore it.
+            raise ValueError("small wall")
+        if shape:
+            self.X = np.clip(self.X, 0, shape[1])
+            self.Y = np.clip(self.Y, 0, shape[0])
+        # self.X, self.Y = self.sort_X_Y(self.X, self.Y)
+        self.rr, self.cc = polygon(self.Y, self.X)
+        direction = self.get_direction(self.X, self.Y)
+        end_points = self.get_end_points(self.X, self.Y, direction)
+        self.min_width = self.get_width(self.X, self.Y, direction)
+        self.max_width = self.min_width
+        Wall.__init__(self, id, end_points, direction, self.max_width, self.name)
+        self.length = self.get_length(self.end_points)
+        self.center = self.get_center(self.X, self.Y)
+        self.min_coord, self.max_coord = self.get_width_coods(self.X, self.Y)
+    def get_points(self, e):
+        pol = next(p for p in e.childNodes if p.nodeName == "polygon")
+        points = pol.getAttribute("points").split(" ")
+        points = points[:-1]
+        X, Y = np.array([]), np.array([])
+        for a in points:
+            x, y = a.split(",")
+            X = np.append(X, np.round(float(x)))
+            Y = np.append(Y, np.round(float(y)))
+        return X, Y
+    def get_direction(self, X, Y):
+        max_diff_X = abs(max(X) - min(X))
+        max_diff_Y = abs(max(Y) - min(Y))
+        if max_diff_X > max_diff_Y:
+            return "H"  # horizontal
+        else:
+            return "V"  # vertical
+    def get_center(self, X, Y):
+        return np.mean(X), np.mean(Y)
+    def get_width(self, X, Y, direction):
+        _, _, p1, p2 = self._get_min_points(X, Y)
+        if direction == "H":
+            return (abs(p1[0][1] - p1[1][1]) + abs(p2[0][1] - p2[1][1])) / 2
+        elif "V":
+            return (abs(p1[0][0] - p1[1][0]) + abs(p2[0][0] - p2[1][0])) / 2
+    def _width(self, values):
+        temp = values.tolist() if type(values) is not list else values
+        mean_1 = min(temp)
+        mean_2 = max(temp)
+        return abs(mean_1 - mean_2)
+    def merge_possible(self, merged):
+        max_dist = max([self.max_width, merged.max_width])
+        if self.id == merged.id:
+            return False
+        # walls have to be in the same direction
+        if self.direction != merged.direction:
+            return False
+        # walls have too big width difference
+        if abs(self.max_width - merged.max_width) > merged.max_width:
+            return False
+        # If endpoints are near
+        # self up and left endpoint to merged down and right end point
+        dist1 = calc_distance(self.end_points[0], merged.end_points[1])
+        # self down and right endpoint to merged up and left end point
+        dist2 = calc_distance(self.end_points[1], merged.end_points[0])
+        if dist1 <= max_dist * 1.5 or dist2 <= max_dist * 1.5:
+            return True
+        else:
+            return False
+    def _get_overlap(self, a, b):
+        return max(0, min(a[1], b[1]) - max(a[0], b[0]))
+    def merge_walls(self, merged):
+        max_dist = max([self.max_width, merged.max_width])
+        if self.id == merged.id:
+            return None
+        # walls have to be in the same direction
+        if self.direction != merged.direction:
+            return None
+        # If endpoints are near
+        # self up and left endpoint to merged down and right end point
+        dist1 = calc_distance(self.end_points[0], merged.end_points[1])
+        # self down and right endpoint to merged up and left end point
+        dist2 = calc_distance(self.end_points[1], merged.end_points[0])
+        if dist1 <= max_dist * 1.5:
+            if self._get_overlap(self.min_coord, merged.min_coord) <= 0:
+                return None
+            # merged is on top or on left
+            return self.do_merge(merged, 0)
+        elif dist2 <= max_dist * 1.5:
+            if self._get_overlap(self.min_coord, merged.min_coord) <= 0:
+                return None
+            # merged is on down or on right
+            return self.do_merge(merged, 1)
+        else:
+            return None
+    def _get_min_points(self, X, Y):
+        assert len(X) == 4 and len(Y) == 4
+        length = len(X)
+        min_dist1 = np.inf
+        min_dist2 = np.inf
+        point1 = None
+        point2 = None
+        corners1 = None
+        corners2 = None
+        for i in range(length):
+            x1, y1 = X[i], Y[i]
+            x2, y2 = X[(i + 1) % 4], Y[(i + 1) % 4]
+            dist = np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)
+            if dist < min_dist1:
+                point2 = point1
+                point1 = np.array([(x1 + x2) / 2, (y1 + y2) / 2])
+                min_dist2 = min_dist1
+                min_dist1 = dist
+                corners2 = corners1
+                corners1 = np.array([[x1, y1], [x2, y2]])
+            elif dist <= min_dist2:
+                point2 = np.array([(x1 + x2) / 2, (y1 + y2) / 2])
+                min_dist2 = dist
+                corners2 = np.array([[x1, y1], [x2, y2]])
+        return point1, point2, corners1, corners2
+    def get_end_points(self, X, Y, direction):
+        point1, point2, _, _ = self._get_min_points(X, Y)
+        if point1[0] != point2[0] or point1[1] != point2[1]:
+            if abs(point1[0] - point2[0]) > abs(point1[1] - point2[1]):
+                # horizontal
+                point1[1] = point1[1] + point2[1] / 2.0
+                point2[1] = point1[1]
+                # point1[1] = int(np.round(point1[1]))
+                # point2[1] = int(np.round(point2[1]))
+            else:
+                # vertical
+                point1[0] = point1[0] + point2[0] / 2.0
+                point2[0] = point1[0]
+                # point1[0] = int(np.round(point1[0]))
+                # point2[0] = int(np.round(point2[0]))
+        return self.sort_end_points(direction, point1, point2)
+    def sort_end_points(self, direction, point1, point2):
+        if direction == "V":
+            if point1[1] < point2[1]:
+                return np.array([point1, point2])
+            else:
+                return np.array([point2, point1])
+        else:
+            if point1[0] < point2[0]:
+                return np.array([point1, point2])
+            else:
+                return np.array([point2, point1])
+    def do_merge(self, merged, direction):
+        # update width
+        self.max_width = max([self.max_width, merged.max_width])
+        self.min_width = min([self.min_width, merged.min_width])
+        # update polygon
+        self.X = np.concatenate((self.X, merged.X))
+        self.Y = np.concatenate((self.Y, merged.Y))
+        # update width coordinates
+        self.max_coord = self.get_max_width_coord(merged)
+        self.min_coord = self.get_min_width_coord(merged)
+        if direction == 0:
+            # merged wall is up or left to the original wall
+            self.end_points = np.array([merged.end_points[0], self.end_points[1]])
+        else:
+            # merged wall is down or right to the original wall
+            self.end_points = np.array([self.end_points[0], merged.end_points[1]])
+        self.length = self.get_length(self.end_points)
+        return self
+    def get_max_width_coord(self, merged):
+        width_1 = abs(self.max_coord[0] - self.max_coord[1])
+        width_2 = abs(merged.max_coord[0] - merged.max_coord[1])
+        return self.max_coord if width_1 > width_2 else merged.max_coord
+    def get_min_width_coord(self, merged):
+        width_1 = max(merged.min_coord[0], self.min_coord[0])
+        # width_1 = abs(self.min_coord[0] - self.min_coord[1])
+        width_2 = min(merged.min_coord[1], self.min_coord[1])
+        # width_2 = abs(merged.min_coord[0] - merged.min_coord[1])
+        # return self.min_coord if width_1 < width_2 else merged.min_coord
+        return [width_1, width_2]
+    def get_width_coods(self, X, Y):
+        if self.direction == "H":
+            dist_1 = abs(Y[0] - Y[2])
+            dist_2 = abs(Y[1] - Y[3])
+            if dist_1 < dist_2:
+                return [Y[0], Y[2]], [Y[1], Y[3]]
+            else:
+                return [Y[1], Y[3]], [Y[0], Y[2]]
+        elif self.direction == "V":
+            dist_1 = abs(X[0] - X[3])
+            dist_2 = abs(X[1] - X[2])
+            if dist_1 < dist_2:
+                return [X[0], X[3]], [X[1], X[2]]
+            else:
+                return [X[1], X[2]], [X[0], X[3]]
+    def sort_X_Y(self, X, Y):
+        max_x = max(X)
+        min_x = min(X)
+        max_y = max(Y)
+        min_y = min(Y)
+        res_X, res_Y = [0] * 4, [0] * 4
+        # top left 0, top right 1, bottom left 2, bottom right 3
+        directions = [[min_x, min_y], [max_x, min_y], [min_x, max_y], [max_x, max_y]]
+        length = len(X)
+        for i in range(length):
+            min_dist = 1000000
+            direction_candidate = None
+            for j, direc in enumerate(directions):
+                dist = calc_distance([X[i], Y[i]], direc)
+                if dist < min_dist:
+                    min_dist = dist
+                    direction_candidate = j
+            res_X[direction_candidate] = X[i]
+            res_Y[direction_candidate] = Y[i]
+        return res_X, res_Y
+    def wall_is_pillar(self, avg_wall_width):
+        if self.max_width > avg_wall_width:
+            if self.length < 3 * self.max_width:
+                return True
+        return False
+    def split_pillar_wall(self, ids, avg_wall_width):
+        half = avg_wall_width / 3.0
+        end_points = [[[0, 0], [0, 0]], [[0, 0], [0, 0]], [[0, 0], [0, 0]], [[0, 0], [0, 0]]]
+        self.X[np.argmax(self.X)] = max(self.X) - half
+        self.X[np.argmax(self.X)] = max(self.X) - half
+        self.X[np.argmin(self.X)] = min(self.X) + half
+        self.X[np.argmin(self.X)] = min(self.X) + half
+        self.Y[np.argmax(self.Y)] = max(self.Y) - half
+        self.Y[np.argmax(self.Y)] = max(self.Y) - half
+        self.Y[np.argmin(self.Y)] = min(self.Y) + half
+        self.Y[np.argmin(self.Y)] = min(self.Y) + half
+        for i in range(4):
+            x = self.X[i]
+            y = self.Y[i]
+            end = [x, y]
+            j = i % 2
+            end_points[i][j] = end
+            end_points[(i + 3) % 4][j] = end
+        walls = []
+        for i, e in enumerate(end_points):
+            if abs(e[0][1] - e[1][1]) > abs(e[0][0] - e[1][0]):
+                # vertical wall
+                direction = "V"
+            else:
+                # horizontal wall
+                direction = "H"
+            e = self.sort_end_points(direction, e[0], e[1])
+            wall = LineWall(ids + i, e, direction, avg_wall_width / 2.0, self.name)
+            walls.append(wall)
+        return walls

data_preprocess/raster2graph/combine_json.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import glob
+import json
+import os
+import shutil
+from pathlib import Path
+def combine_json_files(input_pattern, data_path, split_type, output_file, output_image_dir, start_image_id=0):
+    """
+    Combines multiple COCO-style JSON annotation files into a single file.
+    Args:
+        input_pattern: Glob pattern to match the input JSON files (e.g., "annotations/*.json")
+        output_file: Path to the output combined JSON file
+    """
+    os.makedirs(output_image_dir, exist_ok=True)
+    # Initialize combined data structure
+    combined_data = {"images": [], "annotations": [], "categories": []}
+    # Track image and annotation IDs to avoid duplicates
+    annotation_ids_seen = set()
+    next_image_id = start_image_id
+    next_annotation_id = 0
+    skip_file_list = []
+    image_id_mapping = {}
+    # Find all matching JSON files
+    json_files = sorted(glob.glob(input_pattern))
+    print(f"Found {len(json_files)} JSON files to combine")
+    # Process each file
+    for i, json_file in enumerate(json_files):
+        print(f"Processing file {i + 1}/{len(json_files)}: {json_file}")
+        with open(json_file, "r") as f:
+            data = json.load(f)
+        # Store categories from the first file
+        if i == 0 and data.get("categories"):
+            combined_data["categories"] = data["categories"]
+        # empty annos
+        if len(data["annotations"]) == 0:
+            skip_file_list.append(data["images"][0]["id"])
+            continue
+        # Process images
+        for image in data.get("images", []):
+            if image["id"] not in image_id_mapping:
+                image_id_mapping[image["id"]] = next_image_id
+            else:
+                skip_file_list.append(image["id"])
+                continue
+            image["id"] = next_image_id
+            next_image_id += 1
+            # org_file_name = copy(image['file_name'])
+            image["file_name"] = str(image["id"]).zfill(6) + ".png"
+            org_file_name = os.path.basename(json_file).replace(".json", ".png")
+            if image["file_name"] != org_file_name and os.path.exists(f"{data_path}/{split_type}/{org_file_name}"):
+                shutil.copy(f"{data_path}/{split_type}/{org_file_name}", f"{output_image_dir}/{image['file_name']}")
+            combined_data["images"].append(image)
+        # Process annotations
+        for annotation in data.get("annotations", []):
+            annotation["id"] = next_annotation_id
+            next_annotation_id += 1
+            annotation["image_id"] = image_id_mapping[annotation["image_id"]]
+            annotation_ids_seen.add(annotation["id"])
+            combined_data["annotations"].append(annotation)
+    # Write combined data to output file
+    output_path = Path(output_file)
+    output_path.parent.mkdir(exist_ok=True, parents=True)
+    with open(output_file, "w") as f:
+        json.dump(combined_data, f, indent=2)
+    with open(output_path.parent / f"{output_path.name.split('.')[0]}_image_id_mapping.json", "w") as f:
+        json.dump(image_id_mapping, f, indent=2)
+    if len(skip_file_list):
+        with open(output_path.parent / f"{output_path.name.split('.')[0]}_skipped.txt", "w") as f:
+            f.write("\n".join([str(x) for x in skip_file_list]))
+    print(f"Combined data written to {output_file}")
+    print(f"Total images: {len(combined_data['images'])}")
+    print(f"Total annotations: {len(combined_data['annotations'])}")
+    print(f"Total categories: {len(combined_data['categories'])}")
+    print(f"Skipped images: {len(skip_file_list)}")
+    image_id_mapping_list = [[f"{k} {v}"] for k, v in image_id_mapping.items()]  # Reverse mapping for easier lookup
+    return combined_data, image_id_mapping_list
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Combine multiple COCO-style JSON annotation files")
+    parser.add_argument("--input", required=True, help="Glob pattern for input JSON files, e.g., 'annotations/*.json'")
+    parser.add_argument("--output", required=True, help="Output JSON file path")
+    args = parser.parse_args()
+    splits = ["train", "val", "test"]
+    for i, split in enumerate(splits):
+        if split == "train":
+            start_image_id = 0
+        else:
+            start_image_id += len(list(Path(f"{args.input}/{splits[i - 1]}").glob("*.png")))
+        _, image_id_mapping_list = combine_json_files(
+            f"{args.input}/{split}_jsons/*.json",
+            args.input,
+            split,
+            f"{args.output}/annotations/{split}.json",
+            output_image_dir=f"{args.output}/{split}",
+            start_image_id=start_image_id,
+        )

data_preprocess/raster2graph/combine_mapping_ids.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import json
+def generate_combined_mapping(file_mapping_path, image_id_mapping_path, output_path):
+    """
+    Generates a combined mapping file from an original filename mapping
+    and an image ID mapping.
+    Args:
+        file_mapping_path (str): Path to the text file mapping original filenames
+                                 to intermediate 6-digit IDs.
+        image_id_mapping_path (str): Path to the JSON file mapping intermediate
+                                     IDs to destination IDs.
+        output_path (str): Path where the new combined mapping file will be saved.
+    """
+    # 1. Read test_file_mapping.txt
+    org_fn_to_intermediate_id = {}
+    try:
+        with open(file_mapping_path, "r") as f:
+            for line in f:
+                parts = line.strip().split()
+                if len(parts) == 2:
+                    org_fn = parts[0]
+                    # Convert the 6-digit string ID to an integer for lookup
+                    intermediate_id_str = parts[1]
+                    # Remove leading zeros and convert to int
+                    intermediate_id = int(intermediate_id_str)
+                    org_fn_to_intermediate_id[org_fn] = intermediate_id
+    except FileNotFoundError:
+        print(f"Error: The file '{file_mapping_path}' was not found.")
+        return
+    except Exception as e:
+        print(f"Error reading '{file_mapping_path}': {e}")
+        return
+    # 2. Read test_image_id_mapping.json
+    intermediate_id_to_dst_fn = {}
+    try:
+        with open(image_id_mapping_path, "r") as f:
+            image_id_data = json.load(f)
+            for key, value in image_id_data.items():
+                # Keys in JSON are strings, convert to int for consistency
+                intermediate_id_to_dst_fn[int(key)] = value
+    except FileNotFoundError:
+        print(f"Error: The file '{image_id_mapping_path}' was not found.")
+        return
+    except json.JSONDecodeError:
+        print(f"Error: Could not decode JSON from '{image_id_mapping_path}'. Please ensure it's valid JSON.")
+        return
+    except Exception as e:
+        print(f"Error reading '{image_id_mapping_path}': {e}")
+        return
+    # 3. Create the combined mapping and write to output file
+    combined_mappings = []
+    found_mappings_count = 0
+    for org_fn, intermediate_id in org_fn_to_intermediate_id.items():
+        if intermediate_id in intermediate_id_to_dst_fn:
+            dst_fn = intermediate_id_to_dst_fn[intermediate_id]
+            combined_mappings.append(f"{org_fn} {dst_fn}")
+            found_mappings_count += 1
+        else:
+            # Optionally, you can print a warning for IDs not found
+            print(f"Warning: Intermediate ID '{intermediate_id}' for '{org_fn}' not found in image ID mapping.")
+    try:
+        with open(output_path, "w") as f:
+            for mapping_line in combined_mappings:
+                f.write(mapping_line + "\n")
+        print(f"\nSuccessfully generated combined mapping to '{output_path}'.")
+        print(f"Total original filenames processed: {len(org_fn_to_intermediate_id)}")
+        print(f"Total combined mappings written: {found_mappings_count}")
+    except Exception as e:
+        print(f"Error writing to output file '{output_path}': {e}")
+# Define file paths
+file_mapping_path = "data/R2G_hr_dataset_processed/test_file_mapping.txt"
+image_id_mapping_path = "data/R2G_hr_dataset_processed_v1/annotations/test_image_id_mapping.json"
+output_mapping_path = "data/R2G_hr_dataset_processed_v1/annotations/test_combined_mapping.txt"
+# Run the mapping function
+generate_combined_mapping(file_mapping_path, image_id_mapping_path, output_mapping_path)
+# You can optionally print the content of the generated file to verify
+print("\n--- Content of combined_mapping.txt ---")
+try:
+    with open(output_mapping_path, "r") as f:
+        print(f.read())
+except FileNotFoundError:
+    print("Output file was not created.")
+# Clean up dummy files (optional)
+# os.remove(file_mapping_path)
+# os.remove(image_id_mapping_path)

data_preprocess/raster2graph/convert_to_coco.py ADDED Viewed

	@@ -0,0 +1,472 @@

+import gc
+import os
+import sys
+print(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import argparse
+import json
+import shutil
+from multiprocessing import Pool
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from datasets.dataset import MyDataset
+from matplotlib.patches import Patch
+from shapely.geometry import Polygon
+from tqdm import tqdm
+from util.data_utils import edge_inside
+from util.graph_utils import get_cycle_basis_and_semantic, tensors_to_graphs_batch
+mean = [0.920, 0.913, 0.891]
+std = [0.214, 0.216, 0.228]
+ID2CLASS = {
+    0: "unknown",
+    1: "living_room",
+    2: "kitchen",
+    3: "bedroom",
+    4: "bathroom",
+    5: "restroom",
+    6: "balcony",
+    7: "closet",
+    8: "corridor",
+    9: "washing_room",
+    10: "PS",
+    11: "outside",
+    # 12: 'wall'
+}
+def plot_room_map(preds, room_map, room_id=0, im_size=256, plot_text=True):
+    """Draw room polygons overlaid on the density map"""
+    centroid_x = int(np.mean(preds[:, 0]))
+    centroid_y = int(np.mean(preds[:, 1]))
+    # Get text size to create a background box
+    font = cv2.FONT_HERSHEY_SIMPLEX
+    font_scale = 0.3
+    thickness = 1
+    text = str(room_id)
+    (text_width, text_height), baseline = cv2.getTextSize(text, font, font_scale, thickness)
+    border_color = (252, 252, 0)
+    for i, corner in enumerate(preds):
+        if i == len(preds) - 1:
+            cv2.line(
+                room_map,
+                (round(corner[0]), round(corner[1])),
+                (round(preds[0][0]), round(preds[0][1])),
+                border_color,
+                2,
+            )
+        else:
+            cv2.line(
+                room_map,
+                (round(corner[0]), round(corner[1])),
+                (round(preds[i + 1][0]), round(preds[i + 1][1])),
+                border_color,
+                2,
+            )
+        cv2.circle(room_map, (round(corner[0]), round(corner[1])), 2, (0, 0, 255), 2)
+        # cv2.putText(room_map, str(i), (round(corner[0]), round(corner[1])), cv2.FONT_HERSHEY_SIMPLEX,
+        #            0.4, (0, 255, 0), 1, cv2.LINE_AA)
+        # Draw white background box with transparency
+        # overlay = room_map.copy()
+        # cv2.addWeighted(overlay, 0.7, room_map, 0.3, 0, room_map)  # 70% opacity
+        # Draw text
+        if plot_text:
+            cv2.rectangle(
+                room_map,
+                (centroid_x - text_width // 2 - 2, centroid_y - text_height // 2 - 2),
+                (centroid_x + text_width // 2 + 2, centroid_y + text_height // 2 + 2),
+                (255, 255, 255),  # (0, 0, 0),
+                -1,
+            )  # Filled rectangle
+            cv2.putText(
+                room_map,
+                text,
+                (centroid_x - text_width // 2, centroid_y + text_height // 2),
+                font,
+                font_scale,
+                (0, 100, 0),
+                thickness,
+            )
+    return room_map
+def plot_density_map(sample, image_size, room_polys, pred_room_label_per_scene, plot_text=True):
+    if not isinstance(sample, np.ndarray):
+        density_map = np.transpose(sample.cpu().numpy(), [1, 2, 0])
+        # # Convert to grayscale if not already
+        # if density_map.shape[2] > 1:
+        #     density_map = cv2.cvtColor(density_map, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis]
+    else:
+        density_map = sample
+    if density_map.shape[2] == 3:
+        density_map = density_map * (image_size - 1)
+    else:
+        density_map = np.repeat(density_map, 3, axis=2) * (image_size - 1)
+    pred_room_map = np.zeros([image_size, image_size, 3])
+    for room_poly, room_id in zip(room_polys, pred_room_label_per_scene):
+        pred_room_map = plot_room_map(
+            np.array(room_poly), pred_room_map, room_id, im_size=image_size, plot_text=plot_text
+        )
+    alpha = 0.4  # Adjust for desired transparency
+    pred_room_map = cv2.addWeighted(density_map.astype(np.uint8), alpha, pred_room_map.astype(np.uint8), 1 - alpha, 0)
+    return pred_room_map
+def is_clockwise(points):
+    # points is a list of 2d points.
+    assert len(points) > 0
+    s = 0.0
+    for p1, p2 in zip(points, points[1:] + [points[0]]):
+        s += (p2[0] - p1[0]) * (p2[1] + p1[1])
+    return s > 0.0
+def resort_corners(corners):
+    # re-find the starting point and sort corners clockwisely
+    x_y_square_sum = corners[:, 0] ** 2 + corners[:, 1] ** 2
+    start_corner_idx = np.argmin(x_y_square_sum)
+    corners_sorted = np.concatenate([corners[start_corner_idx:], corners[:start_corner_idx]])
+    ## sort points clockwise
+    if not is_clockwise(corners_sorted[:, :2].tolist()):
+        corners_sorted[1:] = np.flip(corners_sorted[1:], 0)
+    return corners
+def create_coco_bounding_box(bb_x, bb_y, image_width, image_height, bound_pad=2):
+    bb_x = np.unique(bb_x)
+    bb_y = np.unique(bb_y)
+    bb_x_min = np.maximum(np.min(bb_x) - bound_pad, 0)
+    bb_y_min = np.maximum(np.min(bb_y) - bound_pad, 0)
+    bb_x_max = np.minimum(np.max(bb_x) + bound_pad, image_width - 1)
+    bb_y_max = np.minimum(np.max(bb_y) + bound_pad, image_height - 1)
+    bb_width = bb_x_max - bb_x_min
+    bb_height = bb_y_max - bb_y_min
+    coco_bb = [bb_x_min, bb_y_min, bb_width, bb_height]
+    return coco_bb
+def prepare_dict():
+    save_dict = {"images": [], "annotations": [], "categories": []}
+    for key, value in ID2CLASS.items():
+        if key == 0:
+            continue
+        type_dict = {"supercategory": "room", "id": key, "name": value}
+        save_dict["categories"].append(type_dict)
+    return save_dict
+def get_args_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--dataset_path",
+        type=str,
+        required=True,
+        help="Path to the dataset directory",
+    )
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        required=True,
+        help="Path to the dataset directory",
+    )
+    # Add more arguments as needed
+    return parser
+def visualize_room_polygons(room_polygons, room_classes, image_size=512, save_path="cubicasa_debug.png"):
+    """
+    Visualize the extracted room polygons.
+    Args:
+        room_polygons: Dictionary of room polygons as returned by extract_room_polygons
+        figsize: Figure size for the plot
+    """
+    # Set figure size to exactly 256x256 pixels
+    dpi = 100  # Standard screen DPI
+    figsize = (image_size / dpi, image_size / dpi)  # Convert pixels to inches
+    class_names = [v for k, v in ID2CLASS.items()]
+    # Get unique classes from the mask
+    unique_classes = list(ID2CLASS.keys())
+    # Create a discrete colormap
+    cmap = plt.cm.get_cmap("gist_ncar", 256)  # nipy_spectral
+    norm = np.linspace(0, 1, 13)  # int(max(unique_classes))+1
+    fig = plt.figure(figsize=figsize, dpi=dpi)
+    ax = fig.add_axes([0, 0, 1, 1])
+    ax.set_xlim(0, image_size)
+    ax.set_ylim(0, image_size)
+    ax.set_aspect("equal")
+    ax.axis("off")
+    # Plot each room polygon and fill with color
+    for polygon, room_cls in zip(room_polygons, room_classes):
+        polygon_array = np.array(polygon).copy()
+        polygon_array[:, 1] = image_size - 1 - polygon_array[:, 1]  # flip
+        # Fill the polygon with its class color
+        color = cmap(norm[int(room_cls)])
+        ax.fill(polygon_array[:, 0], polygon_array[:, 1], color=color, alpha=0.4, zorder=1)
+        # Draw the polygon border
+        ax.plot(polygon_array[:, 0], polygon_array[:, 1], "k-", linewidth=2, zorder=2)
+        # Add room ID label at the centroid
+        centroid_x = np.mean(polygon_array[:, 0])
+        centroid_y = np.mean(polygon_array[:, 1])
+        ax.text(
+            centroid_x,
+            centroid_y,
+            str(room_cls),
+            fontsize=12,
+            ha="center",
+            va="center",
+            bbox=dict(facecolor="white", alpha=0.7),
+            zorder=3,
+        )
+    # Create custom legend elements
+    legend_elements = []
+    for i, cls in enumerate(sorted(unique_classes)):
+        color = cmap(norm[int(cls)])
+        cls_name = f"{int(cls)}_{class_names[int(cls)]}"
+        legend_elements.append(Patch(facecolor=color, edgecolor="black", label=f"{cls_name}", alpha=0.6))
+    ax.legend(
+        handles=legend_elements,
+        loc="best",
+        title="Classes",
+        fontsize=10,
+        markerscale=1,
+        title_fontsize=12,
+        framealpha=0.5,
+    )
+    plt.tight_layout(pad=0)
+    fig.savefig(save_path, bbox_inches="tight", pad_inches=0)
+    plt.close()
+def process_floorplan(image_set, split, source_data_path, save_dir, save_aux_dir, vis_fp=False):
+    img, target = image_set
+    img = img * torch.tensor(std)[:, None, None] + torch.tensor(mean)[:, None, None]  # unnormalize
+    graph = tensors_to_graphs_batch([target["graph"]])
+    del target["graph"]
+    tgt_this_preds = []
+    tgt_this_edges = []
+    for _ in range(len(target["points"])):
+        tgt_p_d = {}
+        tgt_p_d["scores"] = torch.tensor(1.0000, device="cpu")
+        tgt_p_d["points"] = target["unnormalized_points"][_]
+        tgt_p_d["edges"] = target["edges"][_]
+        tgt_p_d["size"] = target["size"]
+        if "semantic_left_up" in target:
+            tgt_p_d["semantic_left_up"] = target["semantic_left_up"][_]
+            tgt_p_d["semantic_right_up"] = target["semantic_right_up"][_]
+            tgt_p_d["semantic_right_down"] = target["semantic_right_down"][_]
+            tgt_p_d["semantic_left_down"] = target["semantic_left_down"][_]
+        tgt_this_preds.append(tgt_p_d)
+        for __ in range(4):
+            adj = graph[0][tuple(tgt_p_d["points"].tolist())][__]
+            if adj != (-1, -1):
+                tgt_p_d1 = tgt_p_d
+                tgt_p_d2 = {}
+                indx = 99999
+                for ___, up in enumerate(target["unnormalized_points"].tolist()):
+                    if abs(up[0] - adj[0]) + abs(up[1] - adj[1]) <= 2:
+                        indx = ___
+                        break
+                # assert indx != 99999
+                if indx == 99999:  # No match found
+                    # Log a warning or skip this iteration
+                    print(f"Warning: No match found for adj {adj}")
+                    continue  # Skip to the next iteration
+                # tgt_p_d2['scores'] = torch.tensor(1.0000, device='cuda:0')
+                tgt_p_d2["points"] = target["unnormalized_points"][indx]
+                tgt_p_d2["edges"] = target["edges"][indx]
+                tgt_p_d2["size"] = target["size"]
+                if "semantic_left_up" in target:
+                    tgt_p_d2["semantic_left_up"] = target["semantic_left_up"][indx]
+                    tgt_p_d2["semantic_right_up"] = target["semantic_right_up"][indx]
+                    tgt_p_d2["semantic_right_down"] = target["semantic_right_down"][indx]
+                    tgt_p_d2["semantic_left_down"] = target["semantic_left_down"][indx]
+                tgt_e_l = (tgt_p_d1, tgt_p_d2)
+                if not edge_inside((tgt_p_d2, tgt_p_d1), tgt_this_edges):
+                    tgt_this_edges.append(tgt_e_l)
+    tgt = [(tgt_this_preds, [], tgt_this_edges)]
+    target_d_rev, target_simple_cycles, target_results = get_cycle_basis_and_semantic((2, 999999, tgt))
+    # convert to coco format
+    polys_list = []
+    polys_semantic_list = []
+    output_json = []
+    image_width, image_height = target["size"][0].item(), target["size"][1].item()
+    filename = target["file_name"].split(".")[0]
+    img_id = int(target["image_id"])
+    img_dict = {}
+    img_dict["file_name"] = str(img_id).zfill(6) + ".png"
+    img_dict["id"] = img_id
+    img_dict["width"] = image_width
+    img_dict["height"] = image_height
+    save_dict = prepare_dict()
+    os.makedirs(os.path.join(save_dir, split), exist_ok=True)
+    os.makedirs(f"{save_dir}/{split}_jsons/", exist_ok=True)
+    json_path = f"{save_dir}/{split}_jsons/{str(img_id).zfill(6)}.json"
+    for instance_id, (poly, poly_cls) in enumerate(zip(target_simple_cycles, target_results)):
+        t = [(int(pt[0]), int(pt[1])) for pt in poly]
+        class_id = int(poly_cls)
+        polys_list.append(t)
+        polys_semantic_list.append(class_id)
+        poly_shapely = Polygon(t)
+        area = poly_shapely.area
+        coco_seg_poly = []
+        polygon = np.array(t)
+        poly_sorted = resort_corners(polygon)
+        for p in poly_sorted:
+            coco_seg_poly += list(p)
+        if area < 100:
+            continue
+        if class_id not in ID2CLASS:
+            print(f"Warning: Class ID {class_id} not found in ID2CLASS mapping. Skipping instance.")
+            continue
+        # Slightly wider bounding box
+        rectangle_shapely = poly_shapely.envelope
+        bb_x, bb_y = rectangle_shapely.exterior.xy
+        coco_bb = create_coco_bounding_box(bb_x, bb_y, image_width, image_height, bound_pad=2)
+        output_json.append(
+            {
+                "image_id": img_id,
+                "segmentation": [coco_seg_poly],
+                "category_id": class_id,
+                "id": instance_id,
+                "area": area,
+                "bbox": coco_bb,
+                "iscrowd": 0,
+            }
+        )
+    if vis_fp:
+        visualize_room_polygons(
+            polys_list,
+            polys_semantic_list,
+            image_size=image_width,
+            save_path=os.path.join(save_aux_dir, str(img_id).zfill(6) + ".png"),
+        )
+        room_map = plot_density_map(
+            img,
+            image_width,
+            polys_list,
+            polys_semantic_list,
+            plot_text=False,
+        )
+        cv2.imwrite(os.path.join(save_aux_dir, str(img_id).zfill(6) + "_density_map.png"), room_map)
+    print(f"Processed image {img_id} with {len(output_json)} instances.")
+    # print(f"Class: {target_results}")
+    # min_class_id = min(target_results)
+    # max_class_id = max(target_results)
+    # if max_class_id == 12:
+    #     breakpoint()
+    # print(f"Min class ID: {min_class_id}, Max class ID: {max_class_id}")
+    save_dict["images"].append(img_dict)
+    save_dict["annotations"] += output_json
+    with open(json_path, "w") as json_file:
+        # Convert all numpy and torch types to native Python types for JSON serialization
+        def convert(o):
+            if isinstance(o, (np.integer, np.int32, np.int64)):
+                return int(o)
+            if isinstance(o, (np.floating, np.float32, np.float64)):
+                return float(o)
+            if isinstance(o, (np.ndarray,)):
+                return o.tolist()
+            if isinstance(o, torch.Tensor):
+                return o.item() if o.numel() == 1 else o.tolist()
+            return str(o)
+        json.dump(save_dict, json_file, default=convert)
+    # rename image file
+    shutil.copy(
+        os.path.join(source_data_path, split, filename + ".png"),
+        os.path.join(save_dir, split, str(img_id).zfill(6) + ".png"),
+    )
+    # Write mapping from source file name to target file name (safe for parallel)
+    mapping_line = f"{filename} {str(img_id).zfill(6)}\n"
+    # Each process writes to its own temp file
+    pid = os.getpid()
+    os.makedirs(os.path.join(save_dir, f"{split}_logs"), exist_ok=True)
+    mapping_file = os.path.join(save_dir, f"{split}_logs", f"{split}_file_mapping_{pid}.txt")
+    with open(mapping_file, "a") as f:
+        f.write(mapping_line)
+if __name__ == "__main__":
+    args = get_args_parser().parse_args()
+    torch.set_printoptions(threshold=np.inf, linewidth=999999)
+    np.set_printoptions(threshold=np.inf, linewidth=999999)
+    gc.collect()
+    torch.cuda.empty_cache()
+    def wrapper(scene_id):
+        try:
+            image_set = dataset[scene_id]
+        except Exception as e:
+            print(f"Error processing scene {scene_id}: {e}. Skipping...")
+            return
+        process_floorplan(image_set, split, args.dataset_path, args.output_dir, save_aux_dir, vis_fp=scene_id < 100)
+    def worker_init(dataset_obj):
+        # Store dataset as global to avoid pickling issues
+        global dataset
+        dataset = dataset_obj
+    splits = ["train", "val", "test"]
+    for split in splits:
+        dataset = MyDataset(
+            args.dataset_path + f"/{split}",
+            args.dataset_path + "/annot_json" + f"/instances_{split}.json",
+            extract_roi=False,
+        )
+        save_aux_dir = os.path.join(args.output_dir, f"{split}_aux")
+        os.makedirs(save_aux_dir, exist_ok=True)
+        # for i, image_set in enumerate(tqdm(dataset)):
+        #     save_aux_dir = os.path.join(args.output_dir, f"{split}_aux")
+        #     os.makedirs(save_aux_dir, exist_ok=True)
+        #     process_floorplan(image_set, split, args.dataset_path, args.output_dir, save_aux_dir, vis_fp=i < 100)
+        num_processes = 16
+        with Pool(num_processes, initializer=worker_init, initargs=(dataset,)) as p:
+            indices = range(len(dataset))
+            list(tqdm(p.imap(wrapper, indices), total=len(dataset)))

data_preprocess/raster2graph/dataset.py ADDED Viewed

	@@ -0,0 +1,296 @@

+import copy
+import json
+import os
+from collections import defaultdict
+import numpy as np
+import torch
+import torch.multiprocessing
+import torch.utils.data
+import torchvision.transforms.functional as F
+from PIL import Image
+from torch.utils.data import Dataset
+from util.data_utils import l1_dist
+from util.graph_utils import graph_to_tensor
+from util.image_id_dict import d
+from util.mean_std import mean, std
+from util.semantics_dict import semantics_dict
+torch.multiprocessing.set_sharing_strategy("file_system")
+class MyDataset(Dataset):
+    def __init__(self, img_path, annot_path, extract_roi, image_size=512):
+        self.img_path = img_path
+        self.quadtree_path = "/".join(img_path.split("/")[:-1]) + "/annot_npy"
+        self.mode = img_path.split("/")[-1]
+        self.image_size = image_size
+        # load annotation
+        with open(annot_path, "r") as f:
+            dataset = json.load(f)
+        # images
+        self.imgs = {}
+        for img in dataset["images"]:
+            self.imgs[img["id"]] = img
+        self.imgToAnns = defaultdict(list)
+        for ann in dataset["annotations"]:
+            self.imgToAnns[ann["image_id"]].append(ann)
+        self.ids = list(sorted(self.imgs.keys()))
+        if "0c-10-c468a57377ff8ef63d3b26a6d1fa-0002" in self.ids:
+            self.ids.remove("0c-10-c468a57377ff8ef63d3b26a6d1fa-0002")
+        if "0c-10-8486f08035ba152d5244ac54099c-0001" in self.ids:
+            self.ids.remove("0c-10-8486f08035ba152d5244ac54099c-0001")
+    def __getitem__(self, index):
+        img_id = self.ids[index]
+        img_file_name = self.imgs[img_id]["file_name"].replace(".jpg", ".png")
+        img = Image.open(os.path.join(self.img_path, img_file_name)).convert("RGB")
+        image_scale = self.image_size / img.size[0]
+        if img.size[0] != self.image_size or img.size[1] != self.image_size:
+            img = img.resize((self.image_size, self.image_size), Image.BILINEAR)
+        if 1:
+            # get structure annotations
+            anns = self.imgToAnns[img_id]
+            new_anns = []
+            for ann in anns:
+                new_ann = copy.deepcopy(ann)
+                new_ann["point"] = [int(ann["point"][0] * image_scale), int(ann["point"][1] * image_scale)]
+                new_anns.append(new_ann)
+            target = {"image_id": img_id, "annotations": new_anns}
+            orig_quadtree = np.load(
+                os.path.join(self.quadtree_path, img_file_name[:-4] + ".npy"), allow_pickle=True
+            ).item()["quatree"][0]
+            quadtree = {}
+            for k, v in orig_quadtree.items():
+                new_k = k
+                new_v = []
+                for pos in v:
+                    new_pos = (int(pos[0] * image_scale), int(pos[1] * image_scale))
+                    new_v.append(new_pos)
+                quadtree[new_k] = new_v
+            orig_graph = np.load(
+                os.path.join(self.quadtree_path, img_file_name[:-4] + ".npy"), allow_pickle=True
+            ).item()
+            del orig_graph["quatree"]
+            new_graph = {}
+            for k, v in orig_graph.items():
+                new_k = (int(k[0] * image_scale), int(k[1] * image_scale))
+                new_v = []
+                for adj in v:
+                    if adj == (-1, -1):
+                        new_v.append((-1, -1))
+                    else:
+                        new_v.append((int(adj[0] * image_scale), int(adj[1] * image_scale)))
+                new_graph[new_k] = new_v
+            target_layers = []
+            for layer, layer_points in quadtree.items():
+                target_layer = []
+                for layer_point in layer_points:
+                    for target_i in target["annotations"]:
+                        if l1_dist(target_i["point"], list(layer_point)) <= 2:
+                            target_layer.append(target_i)
+                            break
+                target_layers.extend(target_layer)
+            layer_indices = []
+            count = 0
+            for k, v in quadtree.items():
+                if k == 0:
+                    layer_indices.append(0)
+                else:
+                    layer_indices.append(count)
+                count += len(v)
+            image_id = torch.tensor([d[img_id]])
+            points = [obj["point"] for obj in target_layers]
+            points = torch.as_tensor(points, dtype=torch.int64).reshape(-1, 2)
+            edges = [obj["edge_code"] for obj in target_layers]
+            edges = torch.tensor(edges, dtype=torch.int64)
+            # get semantic annotations
+            semantic_left_up = [semantics_dict[obj["semantic"][0]] for obj in target_layers]
+            semantic_right_up = [semantics_dict[obj["semantic"][1]] for obj in target_layers]
+            semantic_right_down = [semantics_dict[obj["semantic"][2]] for obj in target_layers]
+            semantic_left_down = [semantics_dict[obj["semantic"][3]] for obj in target_layers]
+            semantic_left_up = torch.tensor(semantic_left_up, dtype=torch.int64)
+            semantic_right_up = torch.tensor(semantic_right_up, dtype=torch.int64)
+            semantic_right_down = torch.tensor(semantic_right_down, dtype=torch.int64)
+            semantic_left_down = torch.tensor(semantic_left_down, dtype=torch.int64)
+            # annotations
+            target = {}
+            target["edges"] = edges
+            target["file_name"] = img_file_name
+            target["image_id"] = image_id
+            target["size"] = torch.as_tensor([img.size[1], img.size[0]])
+            target["semantic_left_up"] = semantic_left_up
+            target["semantic_right_up"] = semantic_right_up
+            target["semantic_right_down"] = semantic_right_down
+            target["semantic_left_down"] = semantic_left_down
+            # get image
+            img = F.to_tensor(img)
+            img = F.normalize(img, mean=mean, std=std)
+            target["unnormalized_points"] = points
+            # normalize
+            points = points / torch.tensor([img.shape[2], img.shape[1]], dtype=torch.float32)
+            target["points"] = points
+            target["layer_indices"] = torch.tensor(layer_indices)
+            target["graph"] = graph_to_tensor(new_graph)
+            return img, target
+    def __len__(self):
+        return len(self.ids)
+class MyDataset2(Dataset):
+    def __init__(self, img_path, annot_path, extract_roi, disable_sem_info=False):
+        self.disable_sem_info = disable_sem_info
+        self.img_path = img_path
+        self.quadtree_path = "/".join(img_path.split("/")[:-1]) + "/annotations_npy/" + img_path.split("/")[-1]
+        self.edgecode_path = "/".join(img_path.split("/")[:-1]) + "/annotations_edge/" + img_path.split("/")[-1]
+        self.mode = img_path.split("/")[-1]
+        available_ids = {int(x.replace(".npy", "")) for x in os.listdir(self.quadtree_path)}
+        # load annotation
+        with open(annot_path, "r") as f:
+            dataset = json.load(f)
+        # images
+        self.imgs = {}
+        for img in dataset["images"]:
+            if img["id"] not in available_ids:
+                continue
+            self.imgs[img["id"]] = img
+        self.imgToAnns = defaultdict(list)
+        for ann in dataset["annotations"]:
+            if ann["image_id"] not in available_ids:
+                continue
+            self.imgToAnns[ann["image_id"]].append(ann)
+        self.ids = list(sorted(self.imgs.keys()))
+    def __getitem__(self, index):
+        img_id = self.ids[index]
+        img_file_name = self.imgs[int(img_id)]["file_name"]
+        img = Image.open(os.path.join(self.img_path, img_file_name)).convert("RGB")
+        if 1:
+            # get structure annotations
+            # anns = self.imgToAnns[int(img_id)]
+            data = np.load(os.path.join(self.quadtree_path, img_file_name[:-4] + ".npy"), allow_pickle=True).item()
+            orig_quadtree = data["quadtree"]
+            orig_graph = data["graph"]
+            image_points = data["points"]
+            new_anns = []
+            for pt in image_points:
+                new_ann = {
+                    "point": [int(pt[0]), int(pt[1])],
+                }
+                new_anns.append(new_ann)
+            target = {"image_id": img_id, "annotations": new_anns}
+            quadtree = {}
+            for k, v in orig_quadtree.items():
+                new_k = k
+                new_v = []
+                for pos in v:
+                    new_pos = (int(pos[0]), int(pos[1]))
+                    new_v.append(new_pos)
+                quadtree[new_k] = new_v
+            new_graph = {}
+            for k, v in orig_graph.items():
+                new_k = (int(k[0]), int(k[1]))
+                new_v = []
+                for adj in v:
+                    if adj == (-1, -1):
+                        new_v.append((-1, -1))
+                    else:
+                        new_v.append((int(adj[0]), int(adj[1])))
+                new_graph[new_k] = new_v
+            target_layers = []
+            for layer, layer_points in quadtree.items():
+                target_layer = []
+                for layer_point in layer_points:
+                    for target_i in target["annotations"]:
+                        if l1_dist(target_i["point"], list(layer_point)) <= 2:
+                            target_layer.append(target_i)
+                            break
+                target_layers.extend(target_layer)
+            layer_indices = []
+            count = 0
+            for k, v in quadtree.items():
+                if k == 0:
+                    layer_indices.append(0)
+                else:
+                    layer_indices.append(count)
+                count += len(v)
+            image_id = torch.tensor([int(img_id)])
+            points = [obj["point"] for obj in target_layers]
+            with open(os.path.join(self.edgecode_path, img_file_name[:-4] + ".json"), "r") as f:
+                edge2code = json.load(f)
+                edge2code = {
+                    tuple(map(lambda x: int(float(x)), key.strip("()").split(", "))): value
+                    for key, value in edge2code.items()
+                }
+            edges = [edge2code[(int(pt[0]), int(pt[1]))] for pt in points]
+            points = torch.as_tensor(points, dtype=torch.int64).reshape(-1, 2)
+            edges = torch.tensor(edges, dtype=torch.int64)
+            # annotations
+            target = {}
+            target["edges"] = edges
+            target["image_id"] = image_id
+            target["file_name"] = img_file_name
+            target["size"] = torch.as_tensor([img.size[1], img.size[0]])
+            # get semantic annotations
+            if not self.disable_sem_info:
+                semantic_left_up = [semantics_dict[obj["semantic"][0]] for obj in target_layers]
+                semantic_right_up = [semantics_dict[obj["semantic"][1]] for obj in target_layers]
+                semantic_right_down = [semantics_dict[obj["semantic"][2]] for obj in target_layers]
+                semantic_left_down = [semantics_dict[obj["semantic"][3]] for obj in target_layers]
+                semantic_left_up = torch.tensor(semantic_left_up, dtype=torch.int64)
+                semantic_right_up = torch.tensor(semantic_right_up, dtype=torch.int64)
+                semantic_right_down = torch.tensor(semantic_right_down, dtype=torch.int64)
+                semantic_left_down = torch.tensor(semantic_left_down, dtype=torch.int64)
+                target["semantic_left_up"] = semantic_left_up
+                target["semantic_right_up"] = semantic_right_up
+                target["semantic_right_down"] = semantic_right_down
+                target["semantic_left_down"] = semantic_left_down
+            # get image
+            img = F.to_tensor(img)
+            img = F.normalize(img, mean=mean, std=std)
+            target["unnormalized_points"] = points
+            # normalize
+            points = points / torch.tensor([img.shape[2], img.shape[1]], dtype=torch.float32)
+            target["points"] = points
+            target["layer_indices"] = torch.tensor(layer_indices)
+            # padding (-1,-1) if not enough 4 neighbors
+            for pt, neighbors in new_graph.items():
+                if len(neighbors) < 4:
+                    new_graph[pt].extend([(-1, -1)] * (4 - len(neighbors)))
+                elif len(neighbors) > 4:
+                    new_graph[pt] = neighbors[:4]
+            target["graph"] = graph_to_tensor(new_graph)
+            return img, target
+    def __len__(self):
+        return len(self.ids)

data_preprocess/raster2graph/image_process.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import argparse
+import json
+import os
+import numpy as np
+from PIL import Image
+from tqdm import tqdm
+parser = argparse.ArgumentParser("Preprocess LIFULL HOMES DATA (HIGH RESOLUTION) Dataset")
+parser.add_argument("--data_root", type=str, default=r"R2G_hr_dataset/", help="path to the root folder of the dataset")
+args = parser.parse_args()
+SIZE = 512
+MARGIN = 64
+np.set_printoptions(threshold=np.inf, linewidth=999999)
+# original_images_path = r'E:/LIFULL HOMES DATA (HIGH RESOLUTION)/photo-rent-madori-full-00'
+original_images_path = args.data_root
+with open(f"{args.data_root}/annot_json/instances_train.json", mode="r") as f_train:
+    train_jpgs = [_["file_name"] for _ in json.load(f_train)["images"]]
+with open(f"{args.data_root}/annot_json/instances_val.json", mode="r") as f_val:
+    val_jpgs = [_["file_name"] for _ in json.load(f_val)["images"]]
+with open(f"{args.data_root}/instances_test.json", mode="r") as f_test:
+    test_jpgs = [_["file_name"] for _ in json.load(f_test)["images"]]
+jpgs = {"train": train_jpgs, "val": val_jpgs, "test": test_jpgs}
+start_idx = 0
+for mode in ["train", "val", "test"]:
+    output_dir = "./" + mode
+    os.makedirs(output_dir, exist_ok=True)
+    for fnames in [jpgs[mode]]:
+        for i in tqdm(range(len(fnames))):
+            fn = fnames[i].replace(".jpg", "")
+            if os.path.exists(os.path.join(f"{args.data_root}/annot_npy", fn + ".npy")) and os.path.exists(
+                os.path.join(f"{args.data_root}/original_vector_boundary", fn + ".npy")
+            ):
+                img_original = Image.open(os.path.join(original_images_path, fn.replace("-", "/") + ".jpg"))
+                boundary_path = os.path.join(f"{args.data_root}/original_vector_boundary", fn + ".npy")
+                boundary = np.load(boundary_path, allow_pickle=True).item()
+                x_min = boundary["x_min"]
+                x_max = boundary["x_max"]
+                y_min = boundary["y_min"]
+                y_max = boundary["y_max"]
+                width = x_max - x_min
+                mid_width = (x_max + x_min) / 2
+                height = y_max - y_min
+                mid_height = (y_max + y_min) / 2
+                if width > height:
+                    scale = (SIZE - 2 * MARGIN) / width
+                else:
+                    scale = (SIZE - 2 * MARGIN) / height
+                # print(x_min, y_min, x_max, y_max, width, height, scale)
+                original_width, original_height = img_original.size
+                new_width = int(original_width * scale)
+                new_height = int(original_height * scale)
+                scaled_image = img_original.resize((new_width, new_height), Image.Resampling.LANCZOS)
+                canvas = Image.new("RGB", (512, 512), (255, 255, 255))
+                # print(new_width, new_height)
+                x_topleft_offset = int(512 / 2 - mid_width * scale)
+                y_topleft_offset = int(512 / 2 - mid_height * scale)
+                canvas.paste(scaled_image, (x_topleft_offset, y_topleft_offset))
+                canvas.save(os.path.join(output_dir, fn + ".png"))
+            start_idx += 1

data_preprocess/raster2graph/util/data_utils.py ADDED Viewed

	@@ -0,0 +1,966 @@

+import copy
+import random
+import torch
+from util.edges_utils import get_edges_alldirections_rev
+from util.math_utils import clip
+from util.mean_std import mean, std
+def data_to_cuda(samples, targets):
+    return samples.to(torch.device("cuda")), [
+        {k: v if isinstance(v, str) else v.to(torch.device("cuda")) for k, v in t.items()} for t in targets
+    ]
+def get_random_layer_targets(targets, gt_layer):
+    random_targets = []
+    for batch_i, target_i in enumerate(targets):
+        random_layer_targets_i = copy.deepcopy(target_i)
+        if gt_layer[batch_i] != len(random_layer_targets_i["layer_indices"]) - 1:
+            start = random_layer_targets_i["layer_indices"][gt_layer[batch_i]].item()
+            end = random_layer_targets_i["layer_indices"][gt_layer[batch_i] + 1].item()
+        else:
+            start = random_layer_targets_i["layer_indices"][gt_layer[batch_i]].item()
+            end = len(random_layer_targets_i["points"])
+        random_points_i = random_layer_targets_i["points"][start:end, :]
+        random_edges_i = random_layer_targets_i["edges"][start:end]
+        random_unnormalized_points_i = random_layer_targets_i["unnormalized_points"][start:end, :]
+        random_layer_targets_i["points"] = random_points_i
+        random_layer_targets_i["edges"] = random_edges_i
+        random_layer_targets_i["unnormalized_points"] = random_unnormalized_points_i
+        del random_layer_targets_i["layer_indices"]
+        random_targets.append(random_layer_targets_i)
+    return random_targets
+def random_layers(targets):
+    return [random.randint(0, len(targets[i]["layer_indices"]) - 1) for i in range(len(targets))]
+def get_given_layers_random_region(targets, graphs):
+    random_regions = []
+    for bs_i in range(len(targets)):
+        # target
+        targets_i = targets[bs_i]
+        graphs_i = graphs[bs_i]
+        # level 0: start
+        start_i = tuple(targets_i["unnormalized_points"][0].tolist())
+        # sampled prob: for a neighborhood, each node is sampled by this probability
+        # sampled_prob = 0.0001
+        # sampled_prob = random.random()
+        sampled_prob = 0.5
+        # sampled_prob = 1
+        # sampled nodes
+        sampled_points = {}
+        for point_tensor in targets_i["unnormalized_points"]:
+            pos = tuple(point_tensor.tolist())
+            sampled_points[pos] = 0
+        # edges of sampled nodes
+        sampled_edges = []
+        # nodes number of subgraph
+        # sampled_amount = random.randint(0, len(sampled_points) + 2)
+        # if sampled_amount in [len(sampled_points) + 1]:
+        #     sampled_amount = 0
+        # if sampled_amount in [len(sampled_points) + 2]:
+        #     sampled_amount = len(sampled_points)
+        sampled_amount = random.randint(0, len(sampled_points))  # TODO: 1~len(sampled_points)
+        # Note that when sampled_prob = 1, the number of sampled nodes must be in 'layer_indices' or be the total number of points to ensure that the entire layers is sampled.
+        # equal to BFS
+        if sampled_prob == 1:
+            l = targets_i["layer_indices"].tolist()
+            l.append(len(sampled_points))
+            l.append(0)
+            l.append(len(sampled_points))
+            sampled_amount = l[random.randint(0, len(l) - 1)]
+        # start sampling
+        if sampled_amount == 0:
+            random_regions.append((sampled_points, sampled_edges))
+            continue
+        sampled_points[start_i] = 1
+        if sampled_amount == 1:
+            random_regions.append((sampled_points, sampled_edges))
+            continue
+        max_iterations = max(1000, 10 * sampled_amount)  # Ensure at least 1000 iterations
+        iteration_count = 0
+        while sum(sampled_points.values()) < sampled_amount:
+            iteration_count += 1
+            if iteration_count > max_iterations:
+                print("Reached maximum iterations, breaking to avoid infinite loop.")
+                break
+            all_sampled_points = set([k for k, v in sampled_points.items() if v == 1])
+            all_sampled_points_adjs = set()
+            for sampled_point in all_sampled_points:
+                adj = set([(int(x[0]), int(x[1])) for x in graphs_i[sampled_point]])
+                all_sampled_points_adjs = all_sampled_points_adjs.union(adj)
+            if (-1, -1) in all_sampled_points_adjs:
+                all_sampled_points_adjs.remove((-1, -1))
+            all_sampled_points_adjs = list(all_sampled_points_adjs.difference(all_sampled_points))
+            if not all_sampled_points_adjs:  # If no more adjacent points to sample, break
+                print("No more adjacent points to sample, breaking the loop.")
+                break
+            # shuffle the last layer to let it uniform (no bias of sample order)
+            random.shuffle(all_sampled_points_adjs)
+            # determine whether to sample nodes in each neighborhood based on probability
+            for all_sampled_points_adj_index, all_sampled_points_adj in enumerate(all_sampled_points_adjs):
+                all_sampled_points = set([k for k, v in sampled_points.items() if v == 1])
+                if sum(sampled_points.values()) == sampled_amount:
+                    break
+                else:
+                    if 1:
+                        if random.random() < sampled_prob:
+                            sampled_points[all_sampled_points_adj] = 1
+                            # sample edges
+                            all_pos1s = graphs_i[all_sampled_points_adj]
+                            pos2 = all_sampled_points_adj
+                            for pos1 in all_pos1s:
+                                if pos1 in all_sampled_points:
+                                    sampled_edges.append((pos1, pos2))
+                        else:
+                            sampled_points[all_sampled_points_adj] = 0
+        random_regions.append((sampled_points, sampled_edges))
+    return random_regions
+def get_random_region_targets(given_layers, graphs, targets):
+    random_region_targets = []
+    for bs_i in range(len(targets)):
+        random_region_target = {}
+        targets_i = targets[bs_i]
+        graphs_i = graphs[bs_i]
+        given_layers_i = given_layers[bs_i]
+        sampled_points_i, sampled_edges_i = given_layers_i
+        if sum(sampled_points_i.values()) == 0:
+            random_region_target["edges"] = targets_i["edges"][:1]
+            if "semantic_left_up" in targets_i:
+                random_region_target["semantic_left_up"] = targets_i["semantic_left_up"][:1]
+                random_region_target["semantic_right_up"] = targets_i["semantic_right_up"][:1]
+                random_region_target["semantic_right_down"] = targets_i["semantic_right_down"][:1]
+                random_region_target["semantic_left_down"] = targets_i["semantic_left_down"][:1]
+            random_region_target["image_id"] = targets_i["image_id"]
+            random_region_target["size"] = targets_i["size"]
+            random_region_target["unnormalized_points"] = targets_i["unnormalized_points"][:1]
+            random_region_target["points"] = targets_i["points"][:1]
+            random_region_target["last_edges"] = torch.zeros(
+                (1,), dtype=targets_i["edges"].dtype, device=targets_i["edges"].device
+            )
+            random_region_target["this_edges"] = torch.zeros(
+                (1,), dtype=targets_i["edges"].dtype, device=targets_i["edges"].device
+            )
+            random_region_targets.append(random_region_target)
+        elif 1 <= sum(sampled_points_i.values()) <= len(sampled_points_i) - 1:
+            sampled_points_i_given = set([k for k, v in sampled_points_i.items() if v == 1])
+            unnormalized_points = []
+            for point, sampled_or_not in sampled_points_i.items():
+                if sampled_or_not == 0:
+                    adjs = graphs_i[point]
+                    for adj in adjs:
+                        if adj in sampled_points_i_given:
+                            unnormalized_points.append(point)
+                            break
+            if len(unnormalized_points) == 0:
+                random_region_target["edges"] = targets_i["edges"][:1]
+                if "semantic_left_up" in targets_i:
+                    random_region_target["semantic_left_up"] = targets_i["semantic_left_up"][:1]
+                    random_region_target["semantic_right_up"] = targets_i["semantic_right_up"][:1]
+                    random_region_target["semantic_right_down"] = targets_i["semantic_right_down"][:1]
+                    random_region_target["semantic_left_down"] = targets_i["semantic_left_down"][:1]
+                random_region_target["image_id"] = targets_i["image_id"]
+                random_region_target["size"] = targets_i["size"]
+                random_region_target["unnormalized_points"] = targets_i["unnormalized_points"][:1]
+                random_region_target["points"] = targets_i["points"][:1]
+                random_region_target["last_edges"] = torch.zeros(
+                    (1,), dtype=targets_i["edges"].dtype, device=targets_i["edges"].device
+                )
+                random_region_target["this_edges"] = torch.zeros(
+                    (1,), dtype=targets_i["edges"].dtype, device=targets_i["edges"].device
+                )
+                random_region_targets.append(random_region_target)
+                continue
+            indices_for_semantic = []
+            for unnormalized_point in unnormalized_points:
+                for ind, every_point in enumerate(targets_i["unnormalized_points"]):
+                    every_point = tuple(every_point.tolist())
+                    if (
+                        abs(every_point[0] - unnormalized_point[0]) <= 2
+                        and abs(every_point[1] - unnormalized_point[1]) <= 2
+                    ):
+                        indices_for_semantic.append(ind)
+            # assert len(unnormalized_points) == len(indices_for_semantic)
+            semantic_left_up = []
+            semantic_right_up = []
+            semantic_right_down = []
+            semantic_left_down = []
+            if "semantic_left_up" in targets_i:
+                for ind in indices_for_semantic:
+                    semantic_left_up.append(targets_i["semantic_left_up"][ind].item())
+                    semantic_right_up.append(targets_i["semantic_right_up"][ind].item())
+                    semantic_right_down.append(targets_i["semantic_right_down"][ind].item())
+                    semantic_left_down.append(targets_i["semantic_left_down"][ind].item())
+            edges = []
+            for unnormalized_point in unnormalized_points:
+                edge = ""
+                adjs = graphs_i[unnormalized_point]
+                for adj in adjs:
+                    if adj != (-1, -1):
+                        edge += "1"
+                    else:
+                        edge += "0"
+                edge = get_edges_alldirections_rev(edge)
+                edges.append(edge)
+            last_edges = []
+            for unnormalized_point in unnormalized_points:
+                last_edge = ""
+                adjs = graphs_i[unnormalized_point]
+                for adj in adjs:
+                    if adj in sampled_points_i_given:
+                        last_edge += "1"
+                    else:
+                        last_edge += "0"
+                last_edge = get_edges_alldirections_rev(last_edge)
+                last_edges.append(last_edge)
+            this_edges = []
+            for unnormalized_point in unnormalized_points:
+                this_edge = ""
+                adjs = graphs_i[unnormalized_point]
+                for adj in adjs:
+                    if adj in unnormalized_points:
+                        this_edge += "1"
+                    else:
+                        this_edge += "0"
+                this_edge = get_edges_alldirections_rev(this_edge)
+                this_edges.append(this_edge)
+            random_region_target["edges"] = torch.tensor(
+                edges, dtype=targets_i["edges"].dtype, device=targets_i["edges"].device
+            )
+            if "semantic_left_up" in targets_i:
+                random_region_target["semantic_left_up"] = torch.tensor(
+                    semantic_left_up,
+                    dtype=targets_i["semantic_left_up"].dtype,
+                    device=targets_i["semantic_left_up"].device,
+                )
+                random_region_target["semantic_right_up"] = torch.tensor(
+                    semantic_right_up,
+                    dtype=targets_i["semantic_right_up"].dtype,
+                    device=targets_i["semantic_right_up"].device,
+                )
+                random_region_target["semantic_right_down"] = torch.tensor(
+                    semantic_right_down,
+                    dtype=targets_i["semantic_right_down"].dtype,
+                    device=targets_i["semantic_right_down"].device,
+                )
+                random_region_target["semantic_left_down"] = torch.tensor(
+                    semantic_left_down,
+                    dtype=targets_i["semantic_left_down"].dtype,
+                    device=targets_i["semantic_left_down"].device,
+                )
+            random_region_target["image_id"] = targets_i["image_id"]
+            random_region_target["size"] = targets_i["size"]
+            # NEW
+            # if len(unnormalized_points) == 0:
+            #     print("Warning: unnormalized_points is empty. Initializing to default value.")
+            #     unnormalized_points = torch.zeros((1, 2), dtype=targets_i['unnormalized_points'].dtype,
+            #                                     device=targets_i['unnormalized_points'].device)
+            # else:
+            # random_region_target['unnormalized_points'] = torch.tensor(unnormalized_points,
+            #                                                         dtype=targets_i['unnormalized_points'].dtype,
+            #                                                         device=targets_i['unnormalized_points'].device)
+            random_region_target["unnormalized_points"] = torch.tensor(
+                unnormalized_points,
+                dtype=targets_i["unnormalized_points"].dtype,
+                device=targets_i["unnormalized_points"].device,
+            )
+            random_region_target["points"] = (
+                torch.tensor(unnormalized_points, dtype=targets_i["points"].dtype, device=targets_i["points"].device)
+                / targets_i["size"]
+            )
+            random_region_target["last_edges"] = torch.tensor(
+                last_edges, dtype=targets_i["edges"].dtype, device=targets_i["edges"].device
+            )
+            random_region_target["this_edges"] = torch.tensor(
+                this_edges, dtype=targets_i["edges"].dtype, device=targets_i["edges"].device
+            )
+            random_region_targets.append(random_region_target)
+        else:
+            random_region_target["edges"] = 16 * torch.ones(
+                targets_i["edges"][:1].shape, dtype=targets_i["edges"].dtype, device=targets_i["edges"].device
+            )
+            if "semantic_left_up" in targets_i:
+                random_region_target["semantic_left_up"] = 11 * torch.ones(
+                    targets_i["semantic_left_up"][:1].shape,
+                    dtype=targets_i["semantic_left_up"].dtype,
+                    device=targets_i["semantic_left_up"].device,
+                )
+                random_region_target["semantic_right_up"] = 11 * torch.ones(
+                    targets_i["semantic_right_up"][:1].shape,
+                    dtype=targets_i["semantic_right_up"].dtype,
+                    device=targets_i["semantic_right_up"].device,
+                )
+                random_region_target["semantic_right_down"] = 11 * torch.ones(
+                    targets_i["semantic_right_down"][:1].shape,
+                    dtype=targets_i["semantic_right_down"].dtype,
+                    device=targets_i["semantic_right_down"].device,
+                )
+                random_region_target["semantic_left_down"] = 11 * torch.ones(
+                    targets_i["semantic_left_down"][:1].shape,
+                    dtype=targets_i["semantic_left_down"].dtype,
+                    device=targets_i["semantic_left_down"].device,
+                )
+            random_region_target["image_id"] = targets_i["image_id"]
+            random_region_target["size"] = targets_i["size"]
+            random_region_target["unnormalized_points"] = 505 * torch.ones(
+                targets_i["unnormalized_points"][:1].shape,
+                dtype=targets_i["unnormalized_points"][:1].dtype,
+                device=targets_i["unnormalized_points"][:1].device,
+            )
+            random_region_target["points"] = (
+                505
+                * torch.ones(
+                    targets_i["unnormalized_points"][:1].shape,
+                    dtype=targets_i["points"][:1].dtype,
+                    device=targets_i["points"][:1].device,
+                )
+            ) / targets_i["size"]
+            random_region_target["last_edges"] = 16 * torch.ones(
+                (1,), dtype=targets_i["edges"].dtype, device=targets_i["edges"].device
+            )
+            random_region_target["this_edges"] = 16 * torch.ones(
+                (1,), dtype=targets_i["edges"].dtype, device=targets_i["edges"].device
+            )
+            random_region_targets.append(random_region_target)
+    return random_region_targets
+def random_pertubation(sampled_points_i, sampled_edges_i):
+    random_pertube_map = {}
+    sigma = 2
+    pertube_threshold = 5
+    for sampled_point in sampled_points_i:
+        random_pertube_map[sampled_point] = (
+            sampled_point[0] + clip(int(random.gauss(0, sigma)), -1 * pertube_threshold, pertube_threshold),
+            sampled_point[1] + clip(int(random.gauss(0, sigma)), -1 * pertube_threshold, pertube_threshold),
+        )
+    new_sampled_points_i = {}
+    new_sampled_edges_i = []
+    for sampled_point in sampled_points_i:
+        new_sampled_points_i[random_pertube_map[sampled_point]] = sampled_points_i[sampled_point]
+    for pos1, pos2 in sampled_edges_i:
+        new_sampled_edges_i.append((random_pertube_map[pos1], random_pertube_map[pos2]))
+    return new_sampled_points_i, new_sampled_edges_i
+def draw_given_layers_on_tensors_random_region(given_layers, tensors, graphs):
+    """draw 9*9 yellow squares and width 2 blue lines"""
+    tensors_list = []
+    unnormalized_list = []
+    for i in range(len(given_layers)):
+        temp_tensor = tensors[i]
+        temp_tensor_0 = (temp_tensor[0] * std[0] + mean[0]) * 255
+        temp_tensor_1 = (temp_tensor[1] * std[1] + mean[1]) * 255
+        temp_tensor_2 = (temp_tensor[2] * std[2] + mean[2]) * 255
+        rectangle_radius = 5
+        # end sign
+        endsign = (505, 505)
+        valid_violet_endsign_up = endsign[1] - rectangle_radius
+        valid_violet_endsign_down = endsign[1] + rectangle_radius
+        valid_violet_endsign_left = endsign[0] - rectangle_radius
+        valid_violet_endsign_right = endsign[0] + rectangle_radius
+        temp_tensor_0[
+            valid_violet_endsign_up : valid_violet_endsign_down + 1,
+            valid_violet_endsign_left : valid_violet_endsign_right + 1,
+        ] = 255
+        temp_tensor_1[
+            valid_violet_endsign_up : valid_violet_endsign_down + 1,
+            valid_violet_endsign_left : valid_violet_endsign_right + 1,
+        ] = 0
+        temp_tensor_2[
+            valid_violet_endsign_up : valid_violet_endsign_down + 1,
+            valid_violet_endsign_left : valid_violet_endsign_right + 1,
+        ] = 255
+        sampled_points_i, sampled_edges_i = given_layers[i]
+        sampled_points_i, sampled_edges_i = random_pertubation(sampled_points_i, sampled_edges_i)
+        given_points = [k for k, v in sampled_points_i.items() if v == 1]
+        for j, pos in enumerate(given_points):
+            valid_yellow_pos_up = int(pos[1] - rectangle_radius) if (pos[1] - rectangle_radius) >= 0 else 0
+            valid_yellow_pos_down = (
+                int(pos[1] + rectangle_radius)
+                if (pos[1] + rectangle_radius) < temp_tensor.shape[2]
+                else temp_tensor.shape[2] - 1
+            )
+            valid_yellow_pos_left = int(pos[0] - rectangle_radius) if (pos[0] - rectangle_radius) >= 0 else 0
+            valid_yellow_pos_right = (
+                int(pos[0] + rectangle_radius)
+                if (pos[0] + rectangle_radius) < temp_tensor.shape[1]
+                else temp_tensor.shape[1] - 1
+            )
+            temp_tensor_0[
+                valid_yellow_pos_up : valid_yellow_pos_down + 1, valid_yellow_pos_left : valid_yellow_pos_right + 1
+            ] = 255
+            temp_tensor_1[
+                valid_yellow_pos_up : valid_yellow_pos_down + 1, valid_yellow_pos_left : valid_yellow_pos_right + 1
+            ] = 255
+            temp_tensor_2[
+                valid_yellow_pos_up : valid_yellow_pos_down + 1, valid_yellow_pos_left : valid_yellow_pos_right + 1
+            ] = 0
+        # draw blue lines
+        line_width = 2
+        for edge in sampled_edges_i:
+            pos1 = (int(edge[0][0]), int(edge[0][1]))
+            pos2 = (int(edge[1][0]), int(edge[1][1]))
+            if abs(pos1[0] - pos2[0]) < abs(pos1[1] - pos2[1]):
+                if pos1[1] > pos2[1]:
+                    temp_tensor_0[
+                        pos2[1] : pos1[1] + 1,
+                        int((pos1[0] + pos2[0]) / 2) - int(line_width / 2) : int((pos1[0] + pos2[0]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                    ] = 0
+                    temp_tensor_1[
+                        pos2[1] : pos1[1] + 1,
+                        int((pos1[0] + pos2[0]) / 2) - int(line_width / 2) : int((pos1[0] + pos2[0]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                    ] = 0
+                    temp_tensor_2[
+                        pos2[1] : pos1[1] + 1,
+                        int((pos1[0] + pos2[0]) / 2) - int(line_width / 2) : int((pos1[0] + pos2[0]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                    ] = 255
+                else:
+                    temp_tensor_0[
+                        pos1[1] : pos2[1] + 1,
+                        int((pos2[0] + pos1[0]) / 2) - int(line_width / 2) : int((pos2[0] + pos1[0]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                    ] = 0
+                    temp_tensor_1[
+                        pos1[1] : pos2[1] + 1,
+                        int((pos2[0] + pos1[0]) / 2) - int(line_width / 2) : int((pos2[0] + pos1[0]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                    ] = 0
+                    temp_tensor_2[
+                        pos1[1] : pos2[1] + 1,
+                        int((pos2[0] + pos1[0]) / 2) - int(line_width / 2) : int((pos2[0] + pos1[0]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                    ] = 255
+            else:
+                if pos1[0] > pos2[0]:
+                    temp_tensor_0[
+                        int((pos1[1] + pos2[1]) / 2) - int(line_width / 2) : int((pos1[1] + pos2[1]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                        pos2[0] : pos1[0] + 1,
+                    ] = 0
+                    temp_tensor_1[
+                        int((pos1[1] + pos2[1]) / 2) - int(line_width / 2) : int((pos1[1] + pos2[1]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                        pos2[0] : pos1[0] + 1,
+                    ] = 0
+                    temp_tensor_2[
+                        int((pos1[1] + pos2[1]) / 2) - int(line_width / 2) : int((pos1[1] + pos2[1]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                        pos2[0] : pos1[0] + 1,
+                    ] = 255
+                else:
+                    temp_tensor_0[
+                        int((pos2[1] + pos1[1]) / 2) - int(line_width / 2) : int((pos2[1] + pos1[1]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                        pos1[0] : pos2[0] + 1,
+                    ] = 0
+                    temp_tensor_1[
+                        int((pos2[1] + pos1[1]) / 2) - int(line_width / 2) : int((pos2[1] + pos1[1]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                        pos1[0] : pos2[0] + 1,
+                    ] = 0
+                    temp_tensor_2[
+                        int((pos2[1] + pos1[1]) / 2) - int(line_width / 2) : int((pos2[1] + pos1[1]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                        pos1[0] : pos2[0] + 1,
+                    ] = 255
+        unnormalized = torch.stack((temp_tensor_0, temp_tensor_1, temp_tensor_2), dim=0)
+        unnormalized_list.append(unnormalized)
+        temp_tensor_0_renorm = ((temp_tensor_0 / 255) - mean[0]) / std[0]
+        temp_tensor_1_renorm = ((temp_tensor_1 / 255) - mean[1]) / std[1]
+        temp_tensor_2_renorm = ((temp_tensor_2 / 255) - mean[2]) / std[2]
+        temp_tensor = torch.stack([temp_tensor_0_renorm, temp_tensor_1_renorm, temp_tensor_2_renorm], dim=0)
+        tensors_list.append(temp_tensor)
+    return torch.stack(tensors_list, dim=0), torch.stack(unnormalized_list, dim=0)
+def initialize_tensors(tensors):
+    tensors_list = []
+    unnormalized_list = []
+    for i in range(len(tensors)):
+        temp_tensor = tensors[i]
+        temp_tensor_0 = (temp_tensor[0] * std[0] + mean[0]) * 255
+        temp_tensor_1 = (temp_tensor[1] * std[1] + mean[1]) * 255
+        temp_tensor_2 = (temp_tensor[2] * std[2] + mean[2]) * 255
+        rectangle_radius = 5  # 4+1+4=9
+        # end sign (when predict this, AR iteration terminates)
+        endsign = (505, 505)
+        valid_violet_endsign_up = endsign[1] - rectangle_radius
+        valid_violet_endsign_down = endsign[1] + rectangle_radius
+        valid_violet_endsign_left = endsign[0] - rectangle_radius
+        valid_violet_endsign_right = endsign[0] + rectangle_radius
+        temp_tensor_0[
+            valid_violet_endsign_up : valid_violet_endsign_down + 1,
+            valid_violet_endsign_left : valid_violet_endsign_right + 1,
+        ] = 255
+        temp_tensor_1[
+            valid_violet_endsign_up : valid_violet_endsign_down + 1,
+            valid_violet_endsign_left : valid_violet_endsign_right + 1,
+        ] = 0
+        temp_tensor_2[
+            valid_violet_endsign_up : valid_violet_endsign_down + 1,
+            valid_violet_endsign_left : valid_violet_endsign_right + 1,
+        ] = 255
+        unnormalized = torch.stack((temp_tensor_0, temp_tensor_1, temp_tensor_2), dim=0)
+        unnormalized_list.append(unnormalized)
+        temp_tensor_0_renorm = ((temp_tensor_0 / 255) - mean[0]) / std[0]
+        temp_tensor_1_renorm = ((temp_tensor_1 / 255) - mean[1]) / std[1]
+        temp_tensor_2_renorm = ((temp_tensor_2 / 255) - mean[2]) / std[2]
+        temp_tensor = torch.stack([temp_tensor_0_renorm, temp_tensor_1_renorm, temp_tensor_2_renorm], dim=0)
+        tensors_list.append(temp_tensor)
+    return torch.stack(tensors_list, dim=0), torch.stack(unnormalized_list, dim=0)
+def l1_dist(pos1, pos2):
+    return abs(pos1[0] - pos2[0]) + abs(pos1[1] - pos2[1])
+def delete_graphs(targets):
+    no_graph_targets = []
+    for target in targets:
+        target_ = copy.deepcopy(target)
+        del target_["graph"]
+        no_graph_targets.append(target_)
+    return no_graph_targets
+def delete_graphs_and_unnormpoints(targets):
+    no_graph_targets = []
+    for target in targets:
+        target_ = copy.deepcopy(target)
+        del target_["graph"]
+        del target_["unnormalized_points"]
+        no_graph_targets.append(target_)
+    return no_graph_targets
+def get_remove_point(this_preds, dist_threshold):
+    for point1 in this_preds:
+        for point2 in this_preds:
+            # if point1 != point2:
+            if not (
+                (point1["points"].tolist()[0] == point2["points"].tolist()[0])
+                and (point1["points"].tolist()[1] == point2["points"].tolist()[1])
+            ):
+                dist_chebyshev = max(
+                    abs(point1["points"].tolist()[0] - point2["points"].tolist()[0]),
+                    abs(point1["points"].tolist()[1] - point2["points"].tolist()[1]),
+                )
+                if dist_chebyshev <= dist_threshold:
+                    point1_confidence = point1["scores"].item()
+                    point2_confidence = point2["scores"].item()
+                    if point1_confidence < point2_confidence:
+                        return point1
+                    elif point2_confidence < point1_confidence:
+                        return point2
+                    else:
+                        return [point1, point2][random.randint(0, 1)]
+    return None
+def point_inside(point, points_list):
+    point1 = tuple(point["points"].tolist())
+    for point_i in points_list:
+        point1_i = tuple(point_i["points"].tolist())
+        if point1 == point1_i:
+            return True
+    return False
+def remove_points(need_to_remove_in_last_edges, this_preds):
+    result = []
+    for this_pred in this_preds:
+        if not point_inside(this_pred, need_to_remove_in_last_edges):
+            result.append(this_pred)
+    return result
+def nms(this_preds):
+    if len(this_preds) <= 1:
+        return this_preds
+    else:
+        dist_threshold = 5
+        while True:
+            remove_point = get_remove_point(this_preds, dist_threshold)
+            if remove_point is None:
+                break
+            else:
+                # this_preds.remove(remove_point)
+                this_preds = remove_points([remove_point], this_preds)
+        return this_preds
+def nms_givenpoints(this_preds, preds):
+    if len(this_preds) == 0:
+        return this_preds
+    else:
+        all_given_points = []
+        for given_points, given_last_edges, given_this_edges in preds:
+            all_given_points.extend(given_points)
+        if len(all_given_points) == 0:
+            return this_preds
+        this_preds_copy = copy.deepcopy(this_preds)
+        dist_threshold = 5
+        for this_pred in this_preds_copy:
+            for given_point in all_given_points:
+                this_pred_pos = tuple(this_pred["points"].tolist())
+                given_point_pos = tuple(given_point["points"].tolist())
+                dist_chebyshev = max(
+                    abs(this_pred_pos[0] - given_point_pos[0]), abs(this_pred_pos[1] - given_point_pos[1])
+                )
+                if dist_chebyshev <= dist_threshold:
+                    this_preds = remove_points([this_pred], this_preds)
+                    break
+        return this_preds
+def random_keep(this_preds):
+    if len(this_preds) <= 1:
+        return this_preds
+    else:
+        while True:
+            random_keep_this_preds = []
+            for point in this_preds:
+                # is_keep = random.random() < point['scores'].item()
+                is_keep = random.random() < 1.01
+                # is_keep = random.random() < 0.5
+                if is_keep:
+                    random_keep_this_preds.append(point)
+            if len(random_keep_this_preds) > 0:
+                return random_keep_this_preds
+def is_stop(this_preds):
+    if len(this_preds) == 0:
+        return 1  # stop
+    elif (len(this_preds) >= 1) and (16 in [p["edges"].item() for p in this_preds]):
+        return 2  # normally terminate
+    else:
+        return 0  # not stop
+def draw_preds_on_tensors(preds, tensors):
+    tensors_list = []
+    unnormalized_list = []
+    for i in range(len(tensors)):
+        temp_tensor = tensors[i]
+        temp_tensor_0 = (temp_tensor[0] * std[0] + mean[0]) * 255
+        temp_tensor_1 = (temp_tensor[1] * std[1] + mean[1]) * 255
+        temp_tensor_2 = (temp_tensor[2] * std[2] + mean[2]) * 255
+        rectangle_radius = 5
+        this_preds, last_edges, this_edges = preds[-1]
+        for this_pred in this_preds:
+            point = tuple([int(_) for _ in this_pred["points"].tolist()])
+            up = point[1] - rectangle_radius
+            down = point[1] + rectangle_radius
+            left = point[0] - rectangle_radius
+            right = point[0] + rectangle_radius
+            temp_tensor_0[up : down + 1, left : right + 1] = 255
+            temp_tensor_1[up : down + 1, left : right + 1] = 255
+            temp_tensor_2[up : down + 1, left : right + 1] = 0
+        line_width = 2
+        for last_edge in last_edges:
+            pos1 = tuple([int(_) for _ in last_edge[0]["points"].tolist()])
+            pos2 = tuple([int(_) for _ in last_edge[1]["points"].tolist()])
+            if abs(pos1[0] - pos2[0]) < abs(pos1[1] - pos2[1]):
+                if pos1[1] > pos2[1]:
+                    temp_tensor_0[
+                        pos2[1] : pos1[1] + 1,
+                        int((pos1[0] + pos2[0]) / 2) - int(line_width / 2) : int((pos1[0] + pos2[0]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                    ] = 0
+                    temp_tensor_1[
+                        pos2[1] : pos1[1] + 1,
+                        int((pos1[0] + pos2[0]) / 2) - int(line_width / 2) : int((pos1[0] + pos2[0]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                    ] = 0
+                    temp_tensor_2[
+                        pos2[1] : pos1[1] + 1,
+                        int((pos1[0] + pos2[0]) / 2) - int(line_width / 2) : int((pos1[0] + pos2[0]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                    ] = 255
+                else:
+                    temp_tensor_0[
+                        pos1[1] : pos2[1] + 1,
+                        int((pos2[0] + pos1[0]) / 2) - int(line_width / 2) : int((pos2[0] + pos1[0]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                    ] = 0
+                    temp_tensor_1[
+                        pos1[1] : pos2[1] + 1,
+                        int((pos2[0] + pos1[0]) / 2) - int(line_width / 2) : int((pos2[0] + pos1[0]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                    ] = 0
+                    temp_tensor_2[
+                        pos1[1] : pos2[1] + 1,
+                        int((pos2[0] + pos1[0]) / 2) - int(line_width / 2) : int((pos2[0] + pos1[0]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                    ] = 255
+            else:
+                if pos1[0] > pos2[0]:
+                    temp_tensor_0[
+                        int((pos1[1] + pos2[1]) / 2) - int(line_width / 2) : int((pos1[1] + pos2[1]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                        pos2[0] : pos1[0] + 1,
+                    ] = 0
+                    temp_tensor_1[
+                        int((pos1[1] + pos2[1]) / 2) - int(line_width / 2) : int((pos1[1] + pos2[1]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                        pos2[0] : pos1[0] + 1,
+                    ] = 0
+                    temp_tensor_2[
+                        int((pos1[1] + pos2[1]) / 2) - int(line_width / 2) : int((pos1[1] + pos2[1]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                        pos2[0] : pos1[0] + 1,
+                    ] = 255
+                else:
+                    temp_tensor_0[
+                        int((pos2[1] + pos1[1]) / 2) - int(line_width / 2) : int((pos2[1] + pos1[1]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                        pos1[0] : pos2[0] + 1,
+                    ] = 0
+                    temp_tensor_1[
+                        int((pos2[1] + pos1[1]) / 2) - int(line_width / 2) : int((pos2[1] + pos1[1]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                        pos1[0] : pos2[0] + 1,
+                    ] = 0
+                    temp_tensor_2[
+                        int((pos2[1] + pos1[1]) / 2) - int(line_width / 2) : int((pos2[1] + pos1[1]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                        pos1[0] : pos2[0] + 1,
+                    ] = 255
+        for this_edge in this_edges:
+            pos1 = tuple([int(_) for _ in this_edge[0]["points"].tolist()])
+            pos2 = tuple([int(_) for _ in this_edge[1]["points"].tolist()])
+            if abs(pos1[0] - pos2[0]) < abs(pos1[1] - pos2[1]):
+                if pos1[1] > pos2[1]:
+                    temp_tensor_0[
+                        pos2[1] : pos1[1] + 1,
+                        int((pos1[0] + pos2[0]) / 2) - int(line_width / 2) : int((pos1[0] + pos2[0]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                    ] = 0
+                    temp_tensor_1[
+                        pos2[1] : pos1[1] + 1,
+                        int((pos1[0] + pos2[0]) / 2) - int(line_width / 2) : int((pos1[0] + pos2[0]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                    ] = 0
+                    temp_tensor_2[
+                        pos2[1] : pos1[1] + 1,
+                        int((pos1[0] + pos2[0]) / 2) - int(line_width / 2) : int((pos1[0] + pos2[0]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                    ] = 255
+                else:
+                    temp_tensor_0[
+                        pos1[1] : pos2[1] + 1,
+                        int((pos2[0] + pos1[0]) / 2) - int(line_width / 2) : int((pos2[0] + pos1[0]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                    ] = 0
+                    temp_tensor_1[
+                        pos1[1] : pos2[1] + 1,
+                        int((pos2[0] + pos1[0]) / 2) - int(line_width / 2) : int((pos2[0] + pos1[0]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                    ] = 0
+                    temp_tensor_2[
+                        pos1[1] : pos2[1] + 1,
+                        int((pos2[0] + pos1[0]) / 2) - int(line_width / 2) : int((pos2[0] + pos1[0]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                    ] = 255
+            else:
+                if pos1[0] > pos2[0]:
+                    temp_tensor_0[
+                        int((pos1[1] + pos2[1]) / 2) - int(line_width / 2) : int((pos1[1] + pos2[1]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                        pos2[0] : pos1[0] + 1,
+                    ] = 0
+                    temp_tensor_1[
+                        int((pos1[1] + pos2[1]) / 2) - int(line_width / 2) : int((pos1[1] + pos2[1]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                        pos2[0] : pos1[0] + 1,
+                    ] = 0
+                    temp_tensor_2[
+                        int((pos1[1] + pos2[1]) / 2) - int(line_width / 2) : int((pos1[1] + pos2[1]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                        pos2[0] : pos1[0] + 1,
+                    ] = 255
+                else:
+                    temp_tensor_0[
+                        int((pos2[1] + pos1[1]) / 2) - int(line_width / 2) : int((pos2[1] + pos1[1]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                        pos1[0] : pos2[0] + 1,
+                    ] = 0
+                    temp_tensor_1[
+                        int((pos2[1] + pos1[1]) / 2) - int(line_width / 2) : int((pos2[1] + pos1[1]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                        pos1[0] : pos2[0] + 1,
+                    ] = 0
+                    temp_tensor_2[
+                        int((pos2[1] + pos1[1]) / 2) - int(line_width / 2) : int((pos2[1] + pos1[1]) / 2)
+                        + int(line_width / 2)
+                        + 1,
+                        pos1[0] : pos2[0] + 1,
+                    ] = 255
+        unnormalized = torch.stack((temp_tensor_0, temp_tensor_1, temp_tensor_2), dim=0)
+        unnormalized_list.append(unnormalized)
+        temp_tensor_0_renorm = ((temp_tensor_0 / 255) - mean[0]) / std[0]
+        temp_tensor_1_renorm = ((temp_tensor_1 / 255) - mean[1]) / std[1]
+        temp_tensor_2_renorm = ((temp_tensor_2 / 255) - mean[2]) / std[2]
+        temp_tensor = torch.stack([temp_tensor_0_renorm, temp_tensor_1_renorm, temp_tensor_2_renorm], dim=0)
+        tensors_list.append(temp_tensor)
+    return torch.stack(tensors_list, dim=0), torch.stack(unnormalized_list, dim=0)
+def edge_inside(edge, edges_list):
+    edge_point1 = tuple(edge[0]["points"].tolist())
+    edge_point2 = tuple(edge[1]["points"].tolist())
+    for edge_i in edges_list:
+        edge_i_point1 = tuple(edge_i[0]["points"].tolist())
+        edge_i_point2 = tuple(edge_i[1]["points"].tolist())
+        if ((edge_point1 == edge_i_point1) and (edge_point2 == edge_i_point2)) or (
+            (edge_point1 == edge_i_point2) and (edge_point2 == edge_i_point1)
+        ):
+            return True
+    return False
+def remove_edge(edge, edges_list):
+    result = []
+    edge_point1 = tuple(edge[0]["points"].tolist())
+    edge_point2 = tuple(edge[1]["points"].tolist())
+    for edge_i in edges_list:
+        edge_i_point1 = tuple(edge_i[0]["points"].tolist())
+        edge_i_point2 = tuple(edge_i[1]["points"].tolist())
+        if (edge_point1 == edge_i_point1) and (edge_point2 == edge_i_point2):
+            pass
+        else:
+            result.append(edge_i)
+    return result
+def get_edges_amount(preds):
+    count = 0
+    for this_preds, last_edges, this_edges in preds:
+        count += len(last_edges)
+        count += len(this_edges)
+    return count
+def get_reserve_preds(results, keep_confidence_threshold, targets):
+    reserve_preds = []
+    valid_label_indices_edges = torch.where(results["edges"] != 0)[0]
+    valid_label_indices_scores = torch.where(results["scores"] <= keep_confidence_threshold)[0]
+    valid_label_indices = torch.tensor(
+        list(set(valid_label_indices_edges.tolist()).intersection(set(valid_label_indices_scores.tolist()))),
+        dtype=valid_label_indices_edges.dtype,
+        device=valid_label_indices_edges.device,
+    )
+    for valid_label_indice in valid_label_indices:
+        valid_results_i = {}
+        valid_results_i["scores"] = results["scores"][valid_label_indice]
+        valid_results_i["points"] = results["points"][valid_label_indice]
+        valid_results_i["last_edges"] = results["last_edges"][valid_label_indice]
+        valid_results_i["this_edges"] = results["this_edges"][valid_label_indice]
+        valid_results_i["edges"] = results["edges"][valid_label_indice]
+        valid_results_i["size"] = targets[0]["size"]
+        reserve_preds.append(valid_results_i)
+    return reserve_preds

data_preprocess/raster2graph/util/edges_utils.py ADDED Viewed

	@@ -0,0 +1,46 @@

+edges = {
+    0: "0000",
+    1: "0001",
+    2: "0010",
+    3: "0011",
+    4: "0100",
+    5: "0110",
+    6: "0111",
+    7: "1000",
+    8: "1001",
+    9: "1011",
+    10: "1100",
+    11: "1101",
+    12: "1110",
+    13: "1111",
+    14: "0101",
+    15: "1010",
+}
+def get_edges_alldirections(edges_class):
+    return edges[edges_class]
+edges_rev = {
+    "0000": 0,
+    "0001": 1,
+    "0010": 2,
+    "0011": 3,
+    "0100": 4,
+    "0110": 5,
+    "0111": 6,
+    "1000": 7,
+    "1001": 8,
+    "1011": 9,
+    "1100": 10,
+    "1101": 11,
+    "1110": 12,
+    "1111": 13,
+    "0101": 14,
+    "1010": 15,
+}
+def get_edges_alldirections_rev(edges_class_rev):
+    return edges_rev[edges_class_rev]

data_preprocess/raster2graph/util/geom_utils.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import math
+from shapely.geometry import Polygon
+def poly_iou(poly1: Polygon, poly2: Polygon):
+    try:
+        intersection_area = poly1.intersection(poly2).area
+        union_area = poly1.union(poly2).area
+        return intersection_area / union_area
+    except Exception:
+        poly1 = poly1.buffer(1)
+        poly2 = poly2.buffer(1)
+        intersection_area = poly1.intersection(poly2).area
+        union_area = poly1.union(poly2).area
+        return intersection_area / union_area
+def is_clockwise_or_not(points):
+    s = 0
+    for i in range(0, len(points) - 1):
+        s += points[i][0] * points[i + 1][1] - points[i][1] * points[i + 1][0]
+    return s > 0
+def x_axis_angle(y):
+    # 以图像坐标系为准，(1,0)方向记为0度，逆时针绕一圈到360度
+    # print('-------------')
+    # print(y)
+    y_right_hand = (y[0], -y[1])
+    # print(y_right_hand)
+    x = (1, 0)
+    inner = x[0] * y_right_hand[0] + x[1] * y_right_hand[1]
+    # print(inner)
+    y_norm2 = (y_right_hand[0] ** 2 + y_right_hand[1] ** 2) ** 0.5
+    # print(y_norm2)
+    cosxy = inner / (y_norm2 + 1e-8)
+    # print(cosxy)
+    angle = math.acos(cosxy)
+    # print(angle, math.degrees(angle))
+    # print('-------------')
+    return math.degrees(angle) if y_right_hand[1] >= 0 else 360 - math.degrees(angle)
+def get_quadrant(angle):
+    if angle[0] < angle[1]:
+        if 0 <= angle[0] < 90 and 0 <= angle[1] < 90:
+            quadrant = (angle[1] - angle[0], 0, 0, 0)
+        elif 0 <= angle[0] < 90 and 90 <= angle[1] < 180:
+            quadrant = (90 - angle[0], angle[1] - 90, 0, 0)
+        elif 0 <= angle[0] < 90 and 180 <= angle[1] < 270:
+            quadrant = (90 - angle[0], 90, angle[1] - 180, 0)
+        elif 0 <= angle[0] < 90 and 270 <= angle[1] < 360:
+            quadrant = (90 - angle[0], 90, 90, angle[1] - 270)
+        elif 90 <= angle[0] < 180 and 90 <= angle[1] < 180:
+            quadrant = (0, angle[1] - angle[0], 0, 0)
+        elif 90 <= angle[0] < 180 and 180 <= angle[1] < 270:
+            quadrant = (0, 180 - angle[0], angle[1] - 180, 0)
+        elif 90 <= angle[0] < 180 and 270 <= angle[1] < 360:
+            quadrant = (0, 180 - angle[0], 90, angle[1] - 270)
+        elif 180 <= angle[0] < 270 and 180 <= angle[1] < 270:
+            quadrant = (0, 0, angle[1] - angle[0], 0)
+        elif 180 <= angle[0] < 270 and 270 <= angle[1] < 360:
+            quadrant = (0, 0, 270 - angle[0], angle[1] - 270)
+        elif 270 <= angle[0] < 360 and 270 <= angle[1] < 360:
+            quadrant = (0, 0, 0, angle[1] - angle[0])
+    else:
+        if 0 <= angle[1] < 90 and 0 <= angle[0] < 90:
+            quadrant_ = (angle[0] - angle[1], 0, 0, 0)
+        elif 0 <= angle[1] < 90 and 90 <= angle[0] < 180:
+            quadrant_ = (90 - angle[1], angle[0] - 90, 0, 0)
+        elif 0 <= angle[1] < 90 and 180 <= angle[0] < 270:
+            quadrant_ = (90 - angle[1], 90, angle[0] - 180, 0)
+        elif 0 <= angle[1] < 90 and 270 <= angle[0] < 360:
+            quadrant_ = (90 - angle[1], 90, 90, angle[0] - 270)
+        elif 90 <= angle[1] < 180 and 90 <= angle[0] < 180:
+            quadrant_ = (0, angle[0] - angle[1], 0, 0)
+        elif 90 <= angle[1] < 180 and 180 <= angle[0] < 270:
+            quadrant_ = (0, 180 - angle[1], angle[0] - 180, 0)
+        elif 90 <= angle[1] < 180 and 270 <= angle[0] < 360:
+            quadrant_ = (0, 180 - angle[1], 90, angle[0] - 270)
+        elif 180 <= angle[1] < 270 and 180 <= angle[0] < 270:
+            quadrant_ = (0, 0, angle[0] - angle[1], 0)
+        elif 180 <= angle[1] < 270 and 270 <= angle[0] < 360:
+            quadrant_ = (0, 0, 270 - angle[1], angle[0] - 270)
+        elif 270 <= angle[1] < 360 and 270 <= angle[0] < 360:
+            quadrant_ = (0, 0, 0, angle[0] - angle[1])
+        quadrant = (90 - quadrant_[0], 90 - quadrant_[1], 90 - quadrant_[2], 90 - quadrant_[3])
+    return quadrant
+def find_which_angle_to_counterclockwise_rotate_from(t):
+    if t > 270:
+        return 630 - t
+    else:
+        return 270 - t
+def counter_degree(d):
+    if d >= 180:
+        return d - 180
+    else:
+        return d + 180
+def rotate_degree_clockwise_from_counter_degree(src_degree, dest_degree):
+    delta = src_degree - dest_degree
+    return delta if delta >= 0 else 360 + delta
+def rotate_degree_counterclockwise_from_counter_degree(src_degree, dest_degree):
+    delta = dest_degree - src_degree
+    return delta if delta >= 0 else 360 + delta
+def poly_area(points):
+    s = 0
+    points_count = len(points)
+    for i in range(points_count):
+        point = points[i]
+        point2 = points[(i + 1) % points_count]
+        s += (point[0] - point2[0]) * (point[1] + point2[1])
+    return s / 2

data_preprocess/raster2graph/util/graph_utils.py ADDED Viewed

	@@ -0,0 +1,879 @@

+import copy
+import random
+import networkx as nx
+import numpy as np
+import torch
+from util.geom_utils import (
+    get_quadrant,
+    is_clockwise_or_not,
+    poly_area,
+    rotate_degree_counterclockwise_from_counter_degree,
+    x_axis_angle,
+)
+from util.metric_utils import get_results, get_results_float_with_semantic
+def graph_to_tensor(graph):
+    t_l = []
+    for k, v in graph.items():
+        a = []
+        a.append(k)
+        a.extend(v)
+        b = [list(i) for i in a]
+        c = torch.tensor(b)
+        t_l.append(c)
+    return torch.stack(t_l, dim=0)
+def tensor_to_graph(tensor):
+    gr = {}
+    for kv in tensor:
+        k = tuple([i.item() for i in kv[0]])
+        v = kv[1:5]
+        v = v.tolist()
+        v = [tuple(i) for i in v]
+        gr[k] = v
+    return gr
+def tensors_to_graphs_batch(tensors):
+    return [tensor_to_graph(ts) for ts in tensors]
+def get_cycle_basis_and_semantic_deprecated(best_result):
+    output_points, output_edges = get_results_float_with_semantic(best_result)
+    d = {}
+    for output_point_index, output_point in enumerate(output_points):
+        d[output_point] = output_point_index
+    d_rev = {}
+    for output_point_index, output_point in enumerate(output_points):
+        d_rev[output_point_index] = output_point
+    es = []
+    for output_edge in output_edges:
+        es.append((d[output_edge[0]], d[output_edge[1]]))
+    G = nx.Graph()
+    for e in es:
+        G.add_edge(e[0], e[1])
+    nx.draw(G)
+    # plt.show()
+    simple_cycles = nx.cycle_basis(G)
+    results = []
+    for cycle_ind, cycle in enumerate(simple_cycles):
+        points = [d_rev[ind] for ind in cycle]
+        points.append(points[0])
+        is_clockwise = is_clockwise_or_not([(p[0], p[1]) for p in points])
+        if is_clockwise:
+            points.reverse()
+        cross_products = []
+        poses = [(p[0], p[1]) for p in points]
+        for ind in range(len(poses) - 1):
+            ei = [
+                poses[(ind + 1) % (len(poses) - 1)][0] - poses[ind][0],
+                poses[(ind + 1) % (len(poses) - 1)][1] - poses[ind][1],
+            ]
+            eiplus1 = [
+                poses[(ind + 2) % (len(poses) - 1)][0] - poses[(ind + 1) % (len(poses) - 1)][0],
+                poses[(ind + 2) % (len(poses) - 1)][1] - poses[(ind + 1) % (len(poses) - 1)][1],
+            ]
+            cross_products.append(np.cross(ei, eiplus1).tolist())
+        cross_products.insert(0, cross_products[-1])
+        cross_products.pop(-1)
+        while 0 in cross_products:
+            for point_ind, cross_product in enumerate(cross_products):
+                if cross_product == 0:
+                    if point_ind == 0:
+                        p0 = copy.deepcopy(points[0])
+                        points[0] = (
+                            p0[0] + 0.000001 * random.random() * [-1, 1][random.randint(0, 1)],
+                            p0[1] + 0.000001 * random.random() * [-1, 1][random.randint(0, 1)],
+                            p0[2],
+                            p0[3],
+                            p0[4],
+                            p0[5],
+                        )
+                        points[-1] = copy.deepcopy(points[0])
+                    else:
+                        pi = copy.deepcopy(points[point_ind])
+                        points[point_ind] = (
+                            pi[0] + 0.000001 * random.random() * [-1, 1][random.randint(0, 1)],
+                            pi[1] + 0.000001 * random.random() * [-1, 1][random.randint(0, 1)],
+                            pi[2],
+                            pi[3],
+                            pi[4],
+                            pi[5],
+                        )
+            # print(points)
+            cross_products = []
+            poses = [(p[0], p[1]) for p in points]
+            for ind in range(len(poses) - 1):
+                ei = [
+                    poses[(ind + 1) % (len(poses) - 1)][0] - poses[ind][0],
+                    poses[(ind + 1) % (len(poses) - 1)][1] - poses[ind][1],
+                ]
+                eiplus1 = [
+                    poses[(ind + 2) % (len(poses) - 1)][0] - poses[(ind + 1) % (len(poses) - 1)][0],
+                    poses[(ind + 2) % (len(poses) - 1)][1] - poses[(ind + 1) % (len(poses) - 1)][1],
+                ]
+                cross_products.append(np.cross(ei, eiplus1).tolist())
+            cross_products.insert(0, cross_products[-1])
+            cross_products.pop(-1)
+        semantics = [[p[2], p[3], p[4], p[5]] for p in points]
+        degrees = []
+        for ind in range(len(poses) - 1):
+            ei_minus = [
+                -(poses[(ind + 1) % (len(poses) - 1)][0] - poses[ind][0]),
+                -(poses[(ind + 1) % (len(poses) - 1)][1] - poses[ind][1]),
+            ]
+            eiplus1 = [
+                poses[(ind + 2) % (len(poses) - 1)][0] - poses[(ind + 1) % (len(poses) - 1)][0],
+                poses[(ind + 2) % (len(poses) - 1)][1] - poses[(ind + 1) % (len(poses) - 1)][1],
+            ]
+            degrees.append((x_axis_angle(ei_minus), x_axis_angle(eiplus1)))
+        degrees.insert(0, degrees[-1])
+        degrees.pop(-1)
+        angles = []
+        for degree in degrees:
+            angles.append(((min(degree), max(degree)), (max(degree), min(degree))))
+        angles_to_semantics = []
+        for angle_ind, angle in enumerate(angles):
+            angle1 = angle[0]
+            angle2 = angle[1]
+            quadrant1 = get_quadrant(angle1)
+            quadrant2 = get_quadrant(angle2)
+            semantic1 = (
+                semantics[angle_ind][1] if quadrant1[0] >= 45 else -1,
+                semantics[angle_ind][0] if quadrant1[1] >= 45 else -1,
+                semantics[angle_ind][3] if quadrant1[2] >= 45 else -1,
+                semantics[angle_ind][2] if quadrant1[3] >= 45 else -1,
+            )
+            semantic2 = (
+                semantics[angle_ind][1] if quadrant2[0] >= 45 else -1,
+                semantics[angle_ind][0] if quadrant2[1] >= 45 else -1,
+                semantics[angle_ind][3] if quadrant2[2] >= 45 else -1,
+                semantics[angle_ind][2] if quadrant2[3] >= 45 else -1,
+            )
+            angle1_degree = sum(quadrant1)
+            angle2_degree = sum(quadrant2)
+            xproduct = cross_products[angle_ind]
+            if xproduct < 0:
+                if angle1_degree < angle2_degree:
+                    angles_to_semantics.append(semantic1)
+                else:
+                    angles_to_semantics.append(semantic2)
+            elif xproduct > 0:
+                if angle1_degree < angle2_degree:
+                    angles_to_semantics.append(semantic2)
+                else:
+                    angles_to_semantics.append(semantic1)
+            else:
+                assert 0
+        semantic_result = {}
+        for semantic_label in range(0, 13):
+            semantic_result[semantic_label] = 0
+        for everypoint_semantic in angles_to_semantics:
+            everypoint_semantic = [s for s in everypoint_semantic if s != -1]
+            for label in everypoint_semantic:
+                semantic_result[label] += 1 / len(everypoint_semantic)
+        this_cycle_semantic1 = sorted(semantic_result.items(), key=lambda d: d[1], reverse=True)
+        this_cycle_result = None
+        if this_cycle_semantic1[0][1] > this_cycle_semantic1[1][1]:
+            this_cycle_result = this_cycle_semantic1[0][0]
+        else:
+            this_cycle_results = [i[0] for i in this_cycle_semantic1 if i[1] == this_cycle_semantic1[0][1]]
+            this_cycle_result = this_cycle_results[random.randint(0, len(this_cycle_results) - 1)]
+        results.append(this_cycle_result)
+    return d_rev, simple_cycles, results
+def get_cycle_basis_and_semantic(best_result):
+    output_points, output_edges = get_results_float_with_semantic(best_result)
+    output_points = copy.deepcopy(output_points)
+    output_edges = copy.deepcopy(output_edges)
+    d = {}
+    for output_point_index, output_point in enumerate(output_points):
+        d[output_point] = output_point_index
+    d_rev = {}
+    for output_point_index, output_point in enumerate(output_points):
+        d_rev[output_point_index] = output_point
+    es = []
+    for output_edge in output_edges:
+        es.append((d[output_edge[0]], d[output_edge[1]]))
+    # print(d)
+    G = nx.Graph()
+    for e in es:
+        G.add_edge(e[0], e[1])
+    simple_cycles = []
+    simple_cycles_number = []
+    simple_cycles_semantics = []
+    bridges = list(nx.bridges(G))
+    for b in bridges:
+        if (d_rev[b[0]], d_rev[b[1]]) in output_edges:
+            output_edges.remove((d_rev[b[0]], d_rev[b[1]]))
+            es.remove((b[0], b[1]))
+            G.remove_edge(b[0], b[1])
+        if (d_rev[b[1]], d_rev[b[0]]) in output_edges:
+            output_edges.remove((d_rev[b[1]], d_rev[b[0]]))
+            es.remove((b[1], b[0]))
+            G.remove_edge(b[1], b[0])
+    connected_components = list(nx.connected_components(G))
+    for c in connected_components:
+        if len(c) == 1:
+            pass
+        else:
+            simple_cycles_c = []
+            simple_cycles_number_c = []
+            simple_cycle_semantics_c = []
+            # output_points_c = [p for p in output_points if d[p] in c]
+            output_edges_c = [e for e in output_edges if d[e[0]] in c or d[e[1]] in c]
+            output_edges_c_copy_for_traversing = copy.deepcopy(output_edges_c)
+            for edge_c in output_edges_c:
+                if edge_c not in output_edges_c_copy_for_traversing:
+                    pass
+                else:
+                    simple_cycle_semantics = []
+                    simple_cycle = []
+                    simple_cycle_number = []
+                    point1 = edge_c[0]
+                    point2 = edge_c[1]
+                    point1_number = d[point1]
+                    point2_number = d[point2]
+                    initial_point = None
+                    initial_point_number = None
+                    if point1_number < point2_number:
+                        initial_point = point1
+                        initial_point_number = point1_number
+                    else:
+                        initial_point = point2
+                        initial_point_number = point2_number
+                    simple_cycle.append(initial_point)
+                    simple_cycle_number.append(initial_point_number)
+                    last_point = initial_point
+                    current_point = None
+                    current_point_number = None
+                    if point1_number < point2_number:
+                        current_point = point2
+                        current_point_number = point2_number
+                    else:
+                        current_point = point1
+                        current_point_number = point1_number
+                    simple_cycle.append(current_point)
+                    simple_cycle_number.append(current_point_number)
+                    next_initial_point = copy.deepcopy(current_point)
+                    next_point = None
+                    next_point_number = None
+                    while next_point != next_initial_point:
+                        relevant_edges = []
+                        for edge in output_edges_c:
+                            if edge[0] == current_point or edge[1] == current_point:
+                                relevant_edges.append(edge)
+                        relevant_edges_degree = []
+                        for relevant_edge in relevant_edges:
+                            vec = None
+                            if relevant_edge[0] == current_point:
+                                vec = (
+                                    relevant_edge[1][0] - relevant_edge[0][0],
+                                    relevant_edge[1][1] - relevant_edge[0][1],
+                                )
+                            elif relevant_edge[1] == current_point:
+                                vec = (
+                                    relevant_edge[0][0] - relevant_edge[1][0],
+                                    relevant_edge[0][1] - relevant_edge[1][1],
+                                )
+                            else:
+                                assert 0
+                            vec_degree = x_axis_angle(vec)
+                            relevant_edges_degree.append(vec_degree)
+                        vec_from_current_point_to_last_point_degree = None
+                        for relevant_edge_ind, relevant_edge in enumerate(relevant_edges):
+                            if relevant_edge == (current_point, last_point):
+                                vec_from_current_point_to_last_point_degree = relevant_edges_degree[relevant_edge_ind]
+                                relevant_edges.remove(relevant_edge)
+                                relevant_edges_degree.remove(vec_from_current_point_to_last_point_degree)
+                            elif relevant_edge == (last_point, current_point):
+                                vec_from_current_point_to_last_point_degree = relevant_edges_degree[relevant_edge_ind]
+                                relevant_edges.remove(relevant_edge)
+                                relevant_edges_degree.remove(vec_from_current_point_to_last_point_degree)
+                            else:
+                                continue
+                        rotate_deltas_counterclockwise = []
+                        interior_angles = []
+                        for relevant_edge_degree in relevant_edges_degree:
+                            rotate_delta = rotate_degree_counterclockwise_from_counter_degree(
+                                vec_from_current_point_to_last_point_degree, relevant_edge_degree
+                            )
+                            rotate_deltas_counterclockwise.append(rotate_delta)
+                            interior_angles.append((relevant_edge_degree, vec_from_current_point_to_last_point_degree))
+                        # print(rotate_deltas_counterclockwise)
+                        max_rotate_index = rotate_deltas_counterclockwise.index(max(rotate_deltas_counterclockwise))
+                        interior_angle_counterclockwise = interior_angles[max_rotate_index]
+                        current_point_semantic = [
+                            current_point[3],
+                            current_point[2],
+                            current_point[5],
+                            current_point[4],
+                        ]
+                        interior_angle_counterclockwise_degree_smaller = min(interior_angle_counterclockwise)
+                        interior_angle_counterclockwise_degree_bigger = max(interior_angle_counterclockwise)
+                        quadrant_smaller_to_bigger_counterclockwise = get_quadrant(
+                            (
+                                interior_angle_counterclockwise_degree_smaller,
+                                interior_angle_counterclockwise_degree_bigger,
+                            )
+                        )
+                        # print(quadrant_smaller_to_bigger_counterclockwise)
+                        if interior_angle_counterclockwise.index(interior_angle_counterclockwise_degree_smaller) == 0:
+                            pass
+                        elif (
+                            interior_angle_counterclockwise.index(interior_angle_counterclockwise_degree_smaller) == 1
+                        ):
+                            quadrant_smaller_to_bigger_counterclockwise = (
+                                90 - quadrant_smaller_to_bigger_counterclockwise[0],
+                                90 - quadrant_smaller_to_bigger_counterclockwise[1],
+                                90 - quadrant_smaller_to_bigger_counterclockwise[2],
+                                90 - quadrant_smaller_to_bigger_counterclockwise[3],
+                            )
+                        else:
+                            assert 0
+                        current_point_semantic_valid = []
+                        for qd, seman in enumerate(current_point_semantic):
+                            if quadrant_smaller_to_bigger_counterclockwise[qd] >= 45:
+                                current_point_semantic_valid.append(seman)
+                            else:
+                                current_point_semantic_valid.append(-1)
+                        simple_cycle_semantics.append(current_point_semantic_valid)
+                        max_rotate_edge = relevant_edges[max_rotate_index]
+                        if max_rotate_edge[0] == current_point:
+                            next_point = max_rotate_edge[1]
+                            next_point_number = d[next_point]
+                        elif max_rotate_edge[1] == current_point:
+                            next_point = max_rotate_edge[0]
+                            next_point_number = d[next_point]
+                        else:
+                            assert 0
+                        last_point = current_point
+                        current_point = next_point
+                        current_point_number = next_point_number
+                        simple_cycle.append(current_point)
+                        simple_cycle_number.append(current_point_number)
+                    for point_number_ind, point_number in enumerate(simple_cycle_number):
+                        if point_number_ind < len(simple_cycle_number) - 1:
+                            edge_number = (point_number, simple_cycle_number[point_number_ind + 1])
+                            # print(simple_cycle_number)
+                            if edge_number[0] < edge_number[1]:
+                                if (
+                                    d_rev[edge_number[0]],
+                                    d_rev[edge_number[1]],
+                                ) in output_edges_c_copy_for_traversing:
+                                    output_edges_c_copy_for_traversing.remove(
+                                        (d_rev[edge_number[0]], d_rev[edge_number[1]])
+                                    )
+                                elif (
+                                    d_rev[edge_number[1]],
+                                    d_rev[edge_number[0]],
+                                ) in output_edges_c_copy_for_traversing:
+                                    output_edges_c_copy_for_traversing.remove(
+                                        (d_rev[edge_number[1]], d_rev[edge_number[0]])
+                                    )
+                    simple_cycle.pop(-1)
+                    simple_cycle_number.pop(-1)
+                    polygon_counterclockwise = [(int(p[0]), -int(p[1])) for p in simple_cycle]
+                    polygon_counterclockwise.pop(-1)
+                    # print('poly_area(polygon_counterclockwise)', poly_area(polygon_counterclockwise))
+                    if poly_area(polygon_counterclockwise) > 0:
+                        simple_cycles_c.append(simple_cycle)
+                        simple_cycles_number_c.append(simple_cycle_number)
+                        semantic_result = {}
+                        for semantic_label in range(0, 13):
+                            semantic_result[semantic_label] = 0
+                        for everypoint_semantic in simple_cycle_semantics:
+                            everypoint_semantic = [s for s in everypoint_semantic if s != -1]
+                            for label in everypoint_semantic:
+                                semantic_result[label] += 1 / len(everypoint_semantic)
+                        # print(semantic_result)
+                        del semantic_result[11]
+                        del semantic_result[12]
+                        this_cycle_semantic = sorted(semantic_result.items(), key=lambda d: d[1], reverse=True)
+                        # print(this_cycle_semantic)
+                        this_cycle_result = None
+                        if this_cycle_semantic[0][1] > this_cycle_semantic[1][1]:
+                            this_cycle_result = this_cycle_semantic[0][0]
+                        else:
+                            this_cycle_results = [
+                                i[0] for i in this_cycle_semantic if i[1] == this_cycle_semantic[0][1]
+                            ]
+                            this_cycle_result = this_cycle_results[random.randint(0, len(this_cycle_results) - 1)]
+                        # print(this_cycle_result)
+                        simple_cycle_semantics_c.append(this_cycle_result)
+            simple_cycles.extend(simple_cycles_c)
+            simple_cycles_number.extend(simple_cycles_number_c)
+            simple_cycles_semantics.extend(simple_cycle_semantics_c)
+    # print([[(int(j[0]), int(j[1])) for j in i] for i in simple_cycles])
+    # print(len(simple_cycles_number))
+    # print(simple_cycles_semantics)
+    return d_rev, simple_cycles, simple_cycles_semantics
+def get_cycle_basis_and_semantic_2(best_result):
+    output_points, output_edges = get_results_float_with_semantic(best_result)
+    output_points = copy.deepcopy(output_points)
+    output_edges = copy.deepcopy(output_edges)
+    # print(output_points)
+    # print(output_edges)
+    # assert 0
+    d = {}
+    for output_point_index, output_point in enumerate(output_points):
+        d[output_point] = output_point_index
+    d_rev = {}
+    for output_point_index, output_point in enumerate(output_points):
+        d_rev[output_point_index] = output_point
+    es = []
+    for output_edge in output_edges:
+        es.append((d[output_edge[0]], d[output_edge[1]]))
+    # print(d)
+    G = nx.Graph()
+    for e in es:
+        G.add_edge(e[0], e[1])
+    simple_cycles = []
+    simple_cycles_number = []
+    simple_cycles_semantics = []
+    bridges = list(nx.bridges(G))
+    for b in bridges:
+        if (d_rev[b[0]], d_rev[b[1]]) in output_edges:
+            output_edges.remove((d_rev[b[0]], d_rev[b[1]]))
+            es.remove((b[0], b[1]))
+            G.remove_edge(b[0], b[1])
+        if (d_rev[b[1]], d_rev[b[0]]) in output_edges:
+            output_edges.remove((d_rev[b[1]], d_rev[b[0]]))
+            es.remove((b[1], b[0]))
+            G.remove_edge(b[1], b[0])
+    connected_components = list(nx.connected_components(G))
+    for c in connected_components:
+        if len(c) == 1:
+            pass
+        else:
+            simple_cycles_c = []
+            simple_cycles_number_c = []
+            simple_cycle_semantics_c = []
+            output_edges_c = [e for e in output_edges if d[e[0]] in c or d[e[1]] in c]
+            output_edges_c_copy_for_traversing = copy.deepcopy(output_edges_c)
+            for edge_c in output_edges_c:
+                if edge_c not in output_edges_c_copy_for_traversing:
+                    pass
+                else:
+                    simple_cycle_semantics = []
+                    simple_cycle = []
+                    simple_cycle_number = []
+                    point1 = edge_c[0]
+                    point2 = edge_c[1]
+                    point1_number = d[point1]
+                    point2_number = d[point2]
+                    initial_point = None
+                    initial_point_number = None
+                    if point1_number < point2_number:
+                        initial_point = point1
+                        initial_point_number = point1_number
+                    else:
+                        initial_point = point2
+                        initial_point_number = point2_number
+                    simple_cycle.append(initial_point)
+                    simple_cycle_number.append(initial_point_number)
+                    last_point = initial_point
+                    current_point = None
+                    current_point_number = None
+                    if point1_number < point2_number:
+                        current_point = point2
+                        current_point_number = point2_number
+                    else:
+                        current_point = point1
+                        current_point_number = point1_number
+                    simple_cycle.append(current_point)
+                    simple_cycle_number.append(current_point_number)
+                    next_initial_point = copy.deepcopy(current_point)
+                    next_point = None
+                    next_point_number = None
+                    while next_point != next_initial_point:
+                        relevant_edges = []
+                        for edge in output_edges_c:
+                            if edge[0] == current_point or edge[1] == current_point:
+                                relevant_edges.append(edge)
+                        relevant_edges_degree = []
+                        for relevant_edge in relevant_edges:
+                            vec = None
+                            if relevant_edge[0] == current_point:
+                                vec = (
+                                    relevant_edge[1][0] - relevant_edge[0][0],
+                                    relevant_edge[1][1] - relevant_edge[0][1],
+                                )
+                            elif relevant_edge[1] == current_point:
+                                vec = (
+                                    relevant_edge[0][0] - relevant_edge[1][0],
+                                    relevant_edge[0][1] - relevant_edge[1][1],
+                                )
+                            else:
+                                assert 0
+                            vec_degree = x_axis_angle(vec)
+                            relevant_edges_degree.append(vec_degree)
+                        vec_from_current_point_to_last_point_degree = None
+                        for relevant_edge_ind, relevant_edge in enumerate(relevant_edges):
+                            if relevant_edge == (current_point, last_point):
+                                vec_from_current_point_to_last_point_degree = relevant_edges_degree[relevant_edge_ind]
+                                relevant_edges.remove(relevant_edge)
+                                relevant_edges_degree.remove(vec_from_current_point_to_last_point_degree)
+                            elif relevant_edge == (last_point, current_point):
+                                vec_from_current_point_to_last_point_degree = relevant_edges_degree[relevant_edge_ind]
+                                relevant_edges.remove(relevant_edge)
+                                relevant_edges_degree.remove(vec_from_current_point_to_last_point_degree)
+                            else:
+                                continue
+                        rotate_deltas_counterclockwise = []
+                        interior_angles = []
+                        for relevant_edge_degree in relevant_edges_degree:
+                            rotate_delta = rotate_degree_counterclockwise_from_counter_degree(
+                                vec_from_current_point_to_last_point_degree, relevant_edge_degree
+                            )
+                            rotate_deltas_counterclockwise.append(rotate_delta)
+                            interior_angles.append((relevant_edge_degree, vec_from_current_point_to_last_point_degree))
+                        # print(rotate_deltas_counterclockwise)
+                        max_rotate_index = rotate_deltas_counterclockwise.index(max(rotate_deltas_counterclockwise))
+                        interior_angle_counterclockwise = interior_angles[max_rotate_index]
+                        current_point_semantic = [
+                            current_point[3],
+                            current_point[2],
+                            current_point[5],
+                            current_point[4],
+                        ]
+                        interior_angle_counterclockwise_degree_smaller = min(interior_angle_counterclockwise)
+                        interior_angle_counterclockwise_degree_bigger = max(interior_angle_counterclockwise)
+                        quadrant_smaller_to_bigger_counterclockwise = get_quadrant(
+                            (
+                                interior_angle_counterclockwise_degree_smaller,
+                                interior_angle_counterclockwise_degree_bigger,
+                            )
+                        )
+                        if interior_angle_counterclockwise.index(interior_angle_counterclockwise_degree_smaller) == 0:
+                            pass
+                        elif (
+                            interior_angle_counterclockwise.index(interior_angle_counterclockwise_degree_smaller) == 1
+                        ):
+                            quadrant_smaller_to_bigger_counterclockwise = (
+                                90 - quadrant_smaller_to_bigger_counterclockwise[0],
+                                90 - quadrant_smaller_to_bigger_counterclockwise[1],
+                                90 - quadrant_smaller_to_bigger_counterclockwise[2],
+                                90 - quadrant_smaller_to_bigger_counterclockwise[3],
+                            )
+                        else:
+                            assert 0
+                        current_point_semantic_valid = []
+                        for qd, seman in enumerate(current_point_semantic):
+                            if 1:
+                                current_point_semantic_valid.append(seman)
+                            else:
+                                current_point_semantic_valid.append(-1)
+                        simple_cycle_semantics.append(current_point_semantic_valid)
+                        max_rotate_edge = relevant_edges[max_rotate_index]
+                        if max_rotate_edge[0] == current_point:
+                            next_point = max_rotate_edge[1]
+                            next_point_number = d[next_point]
+                        elif max_rotate_edge[1] == current_point:
+                            next_point = max_rotate_edge[0]
+                            next_point_number = d[next_point]
+                        else:
+                            assert 0
+                        last_point = current_point
+                        current_point = next_point
+                        current_point_number = next_point_number
+                        simple_cycle.append(current_point)
+                        simple_cycle_number.append(current_point_number)
+                    for point_number_ind, point_number in enumerate(simple_cycle_number):
+                        if point_number_ind < len(simple_cycle_number) - 1:
+                            edge_number = (point_number, simple_cycle_number[point_number_ind + 1])
+                            if edge_number[0] < edge_number[1]:
+                                if (
+                                    d_rev[edge_number[0]],
+                                    d_rev[edge_number[1]],
+                                ) in output_edges_c_copy_for_traversing:
+                                    output_edges_c_copy_for_traversing.remove(
+                                        (d_rev[edge_number[0]], d_rev[edge_number[1]])
+                                    )
+                                elif (
+                                    d_rev[edge_number[1]],
+                                    d_rev[edge_number[0]],
+                                ) in output_edges_c_copy_for_traversing:
+                                    output_edges_c_copy_for_traversing.remove(
+                                        (d_rev[edge_number[1]], d_rev[edge_number[0]])
+                                    )
+                    simple_cycle.pop(-1)
+                    simple_cycle_number.pop(-1)
+                    polygon_counterclockwise = [(int(p[0]), -int(p[1])) for p in simple_cycle]
+                    polygon_counterclockwise.pop(-1)
+                    if poly_area(polygon_counterclockwise) > 0:
+                        simple_cycles_c.append(simple_cycle)
+                        simple_cycles_number_c.append(simple_cycle_number)
+                        semantic_result = {}
+                        for semantic_label in range(0, 13):
+                            semantic_result[semantic_label] = 0
+                        for everypoint_semantic in simple_cycle_semantics:
+                            for _ in range(0, 13):
+                                if _ in everypoint_semantic:
+                                    semantic_result[_] += 1
+                        del semantic_result[11]
+                        del semantic_result[12]
+                        this_cycle_semantic = sorted(semantic_result.items(), key=lambda d: d[1], reverse=True)
+                        this_cycle_result = None
+                        if this_cycle_semantic[0][1] > this_cycle_semantic[1][1]:
+                            this_cycle_result = this_cycle_semantic[0][0]
+                        else:
+                            this_cycle_results = [
+                                i[0] for i in this_cycle_semantic if i[1] == this_cycle_semantic[0][1]
+                            ]
+                            this_cycle_result = this_cycle_results[random.randint(0, len(this_cycle_results) - 1)]
+                        simple_cycle_semantics_c.append(this_cycle_result)
+            simple_cycles.extend(simple_cycles_c)
+            simple_cycles_number.extend(simple_cycles_number_c)
+            simple_cycles_semantics.extend(simple_cycle_semantics_c)
+    return d_rev, simple_cycles, simple_cycles_semantics
+def get_cycle_basis(best_result):
+    output_points, output_edges = get_results(best_result)
+    output_points = copy.deepcopy(output_points)
+    output_edges = copy.deepcopy(output_edges)
+    d = {}
+    for output_point_index, output_point in enumerate(output_points):
+        d[output_point] = output_point_index
+    d_rev = {}
+    for output_point_index, output_point in enumerate(output_points):
+        d_rev[output_point_index] = output_point
+    es = []
+    for output_edge in output_edges:
+        es.append((d[output_edge[0]], d[output_edge[1]]))
+    G = nx.Graph()
+    for e in es:
+        G.add_edge(e[0], e[1])
+    simple_cycles = []
+    simple_cycles_number = []
+    bridges = list(nx.bridges(G))
+    for b in bridges:
+        if (d_rev[b[0]], d_rev[b[1]]) in output_edges:
+            output_edges.remove((d_rev[b[0]], d_rev[b[1]]))
+            es.remove((b[0], b[1]))
+            G.remove_edge(b[0], b[1])
+        if (d_rev[b[1]], d_rev[b[0]]) in output_edges:
+            output_edges.remove((d_rev[b[1]], d_rev[b[0]]))
+            es.remove((b[1], b[0]))
+            G.remove_edge(b[1], b[0])
+    connected_components = list(nx.connected_components(G))
+    for c in connected_components:
+        if len(c) == 1:
+            pass
+        else:
+            simple_cycles_c = []
+            simple_cycles_number_c = []
+            output_edges_c = [e for e in output_edges if d[e[0]] in c or d[e[1]] in c]
+            output_edges_c_copy_for_traversing = copy.deepcopy(output_edges_c)
+            for edge_c in output_edges_c:
+                if edge_c not in output_edges_c_copy_for_traversing:
+                    pass
+                else:
+                    simple_cycle = []
+                    simple_cycle_number = []
+                    point1 = edge_c[0]
+                    point2 = edge_c[1]
+                    point1_number = d[point1]
+                    point2_number = d[point2]
+                    if point1_number < point2_number:
+                        initial_point = point1
+                        initial_point_number = point1_number
+                        current_point = point2
+                        current_point_number = point2_number
+                    else:
+                        initial_point = point2
+                        initial_point_number = point2_number
+                        current_point = point1
+                        current_point_number = point1_number
+                    simple_cycle.append(initial_point)
+                    simple_cycle_number.append(initial_point_number)
+                    simple_cycle.append(current_point)
+                    simple_cycle_number.append(current_point_number)
+                    last_point = initial_point
+                    next_initial_point = copy.deepcopy(current_point)
+                    next_point = None
+                    while next_point != next_initial_point:
+                        relevant_edges = []
+                        for edge in output_edges_c:
+                            if edge[0] == current_point or edge[1] == current_point:
+                                relevant_edges.append(edge)
+                        relevant_edges_degree = []
+                        for relevant_edge in relevant_edges:
+                            vec = None
+                            if relevant_edge[0] == current_point:
+                                vec = (
+                                    relevant_edge[1][0] - relevant_edge[0][0],
+                                    relevant_edge[1][1] - relevant_edge[0][1],
+                                )
+                            elif relevant_edge[1] == current_point:
+                                vec = (
+                                    relevant_edge[0][0] - relevant_edge[1][0],
+                                    relevant_edge[0][1] - relevant_edge[1][1],
+                                )
+                            else:
+                                assert 0
+                            vec_degree = x_axis_angle(vec)
+                            relevant_edges_degree.append(vec_degree)
+                        vec_from_current_point_to_last_point_degree = None
+                        for relevant_edge_ind, relevant_edge in enumerate(relevant_edges):
+                            if relevant_edge == (current_point, last_point):
+                                vec_from_current_point_to_last_point_degree = relevant_edges_degree[relevant_edge_ind]
+                                relevant_edges.remove(relevant_edge)
+                                relevant_edges_degree.remove(vec_from_current_point_to_last_point_degree)
+                            elif relevant_edge == (last_point, current_point):
+                                vec_from_current_point_to_last_point_degree = relevant_edges_degree[relevant_edge_ind]
+                                relevant_edges.remove(relevant_edge)
+                                relevant_edges_degree.remove(vec_from_current_point_to_last_point_degree)
+                            else:
+                                continue
+                        rotate_deltas_counterclockwise = []
+                        for relevant_edge_degree in relevant_edges_degree:
+                            rotate_delta = rotate_degree_counterclockwise_from_counter_degree(
+                                vec_from_current_point_to_last_point_degree, relevant_edge_degree
+                            )
+                            rotate_deltas_counterclockwise.append(rotate_delta)
+                        max_rotate_index = rotate_deltas_counterclockwise.index(max(rotate_deltas_counterclockwise))
+                        max_rotate_edge = relevant_edges[max_rotate_index]
+                        if max_rotate_edge[0] == current_point:
+                            next_point = max_rotate_edge[1]
+                            next_point_number = d[next_point]
+                        elif max_rotate_edge[1] == current_point:
+                            next_point = max_rotate_edge[0]
+                            next_point_number = d[next_point]
+                        else:
+                            assert 0
+                        last_point = current_point
+                        current_point = next_point
+                        current_point_number = next_point_number
+                        simple_cycle.append(current_point)
+                        simple_cycle_number.append(current_point_number)
+                    for point_number_ind, point_number in enumerate(simple_cycle_number):
+                        if point_number_ind < len(simple_cycle_number) - 1:
+                            edge_number = (point_number, simple_cycle_number[point_number_ind + 1])
+                            if edge_number[0] < edge_number[1]:
+                                if (
+                                    d_rev[edge_number[0]],
+                                    d_rev[edge_number[1]],
+                                ) in output_edges_c_copy_for_traversing:
+                                    output_edges_c_copy_for_traversing.remove(
+                                        (d_rev[edge_number[0]], d_rev[edge_number[1]])
+                                    )
+                                elif (
+                                    d_rev[edge_number[1]],
+                                    d_rev[edge_number[0]],
+                                ) in output_edges_c_copy_for_traversing:
+                                    output_edges_c_copy_for_traversing.remove(
+                                        (d_rev[edge_number[1]], d_rev[edge_number[0]])
+                                    )
+                    simple_cycle.pop(-1)
+                    simple_cycle_number.pop(-1)
+                    polygon_counterclockwise = [(int(p[0]), -int(p[1])) for p in simple_cycle]
+                    polygon_counterclockwise.pop(-1)
+                    if poly_area(polygon_counterclockwise) > 0:
+                        simple_cycles_c.append(simple_cycle)
+                        simple_cycles_number_c.append(simple_cycle_number)
+            simple_cycles.extend(simple_cycles_c)
+            simple_cycles_number.extend(simple_cycles_number_c)
+    return d_rev, simple_cycles, simple_cycles_number

data_preprocess/raster2graph/util/image_id_dict.py ADDED Viewed

The diff for this file is too large to render. See raw diff

data_preprocess/raster2graph/util/math_utils.py ADDED Viewed

	@@ -0,0 +1,7 @@

+def clip(number, _min, _max):
+    if number <= _min:
+        return _min
+    elif number >= _max:
+        return _max
+    else:
+        return number

data_preprocess/raster2graph/util/mean_std.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ mean = [0.920, 0.913, 0.891]
2	+ std = [0.214, 0.216, 0.228]

data_preprocess/raster2graph/util/metric_utils.py ADDED Viewed

	@@ -0,0 +1,338 @@

+import copy
+import math
+from shapely.geometry import Polygon
+from util.geom_utils import poly_iou
+def calculate_AP(valid_results, ground_truths, confidence_final):
+    ground_truths_copy = copy.deepcopy(ground_truths)
+    all_preds = []
+    for image_id, image_pred in valid_results.items():
+        for i in range(len(image_pred["points"])):
+            pred = {}
+            pred["score"] = image_pred["scores"][i].item()
+            pred["point"] = tuple(image_pred["points"][i].tolist())
+            pred["size"] = tuple(image_pred["size"].tolist())
+            pred["image_id"] = image_id.item()
+            all_preds.append(pred)
+    all_preds = sorted(all_preds, key=lambda x: x["score"], reverse=True)
+    all_preds = [pred for pred in all_preds if pred["score"] > confidence_final]
+    all_metrics = []
+    for n in range(1, len(all_preds) + 1):
+        ground_truths = copy.deepcopy(ground_truths_copy)
+        sub_preds = all_preds[0:n]
+        TP = 0
+        FP = 0
+        FN = 0
+        for pred in sub_preds:
+            pred_point = pred["point"]
+            img_size = (pred["size"][1], pred["size"][0])
+            img_id = pred["image_id"]
+            dist_threshold = (img_size[0] * 0.01, img_size[1] * 0.01)
+            gt = [tuple(gt_point) for gt_point in ground_truths[img_id]["points"].tolist()]
+            gt_copy = copy.deepcopy(gt)
+            euc_dists = {}
+            dists = {}
+            for gt_point in gt_copy:
+                if gt_point[2] == 0:
+                    dist = (abs(pred_point[0] - gt_point[0]), abs(pred_point[1] - gt_point[1]))
+                    euc_dist = math.sqrt(dist[0] ** 2 + dist[1] ** 2)
+                    euc_dists[gt_point] = euc_dist
+                    dists[gt_point] = dist
+            euc_dists = sorted(euc_dists.items(), key=lambda x: x[1])
+            if len(euc_dists) == 0:
+                FP += 1
+                continue
+            nearest_gt_point = euc_dists[0][0]
+            min_dist = dists[nearest_gt_point]
+            if min_dist[0] < dist_threshold[0] and min_dist[1] < dist_threshold[1]:
+                gtip = ground_truths[img_id]["points"]
+                for i, p in enumerate(gtip):
+                    if (
+                        p[0].item() == nearest_gt_point[0]
+                        and p[1].item() == nearest_gt_point[1]
+                        and p[2].item() == nearest_gt_point[2]
+                    ):
+                        # print('qqq', p, nearest_gt_point)
+                        gtip[i, 2] = 1
+                        break
+                ground_truths[img_id]["points"] = gtip
+                # print('rrr', ground_truths[img_id]['points'])
+                TP += 1
+                continue
+            FP += 1
+        for img_id, points in ground_truths.items():
+            points = points["points"]
+            for point in points:
+                if point[2] == 0:
+                    FN += 1
+        precision = TP / (TP + FP)
+        recall = TP / (TP + FN)
+        # print(n, TP, FP, FN, precision, recall)
+        all_metrics.append((precision, recall))
+    all_metrics = sorted(all_metrics, key=lambda x: (x[1], x[0]))
+    p_r_curve_points = {}
+    for point in all_metrics:
+        p_r_curve_points[point[1]] = point[0]
+    p_r_curve_points[0] = 1
+    p_r_curve_points = sorted(p_r_curve_points.items(), key=lambda d: d[0])
+    AP = 0
+    for i, rp in enumerate(p_r_curve_points):
+        r = rp[0]
+        p = rp[1]
+        if i > 0:
+            small_rectangular_area = (r - p_r_curve_points[i - 1][0]) * p
+            AP += small_rectangular_area
+    return AP
+def get_results(best_result):
+    if 1:
+        preds = best_result[2]
+        output_points = []
+        output_edges = []
+        for triplet in preds:
+            this_preds = triplet[0]
+            last_edges = triplet[1]
+            this_edges = triplet[2]
+            for this_pred in this_preds:
+                point = tuple(this_pred["points"].int().tolist())
+                output_points.append(point)
+            for last_edge in last_edges:
+                point1 = tuple(last_edge[0]["points"].int().tolist())
+                point2 = tuple(last_edge[1]["points"].int().tolist())
+                edge = (point1, point2)
+                output_edges.append(edge)
+            for this_edge in this_edges:
+                point1 = tuple(this_edge[0]["points"].int().tolist())
+                point2 = tuple(this_edge[1]["points"].int().tolist())
+                edge = (point1, point2)
+                output_edges.append(edge)
+        return output_points, output_edges
+def get_results_visual(best_result):
+    if 1:
+        preds = best_result[2]
+        output_points = []
+        output_edges = []
+        for layer_index, triplet in enumerate(preds):
+            this_preds = triplet[0]
+            last_edges = triplet[1]
+            this_edges = triplet[2]
+            for this_pred in this_preds:
+                point = tuple(this_pred["points"].int().tolist())
+                output_points.append([layer_index, point])
+            for last_edge in last_edges:
+                point1 = tuple(last_edge[0]["points"].int().tolist())
+                point2 = tuple(last_edge[1]["points"].int().tolist())
+                edge = (point1, point2)
+                output_edges.append([layer_index, edge])
+            for this_edge in this_edges:
+                point1 = tuple(this_edge[0]["points"].int().tolist())
+                point2 = tuple(this_edge[1]["points"].int().tolist())
+                edge = (point1, point2)
+                output_edges.append([layer_index, edge])
+        return output_points, output_edges, len(preds)
+def get_results_float_with_semantic(best_result):
+    preds = best_result[2]
+    output_points = []
+    output_edges = []
+    for triplet in preds:
+        this_preds = triplet[0]
+        last_edges = triplet[1]
+        this_edges = triplet[2]
+        for this_pred in this_preds:
+            point = (
+                this_pred["points"].tolist()[0],
+                this_pred["points"].tolist()[1],
+                this_pred["semantic_left_up"].item(),
+                this_pred["semantic_right_up"].item(),
+                this_pred["semantic_right_down"].item(),
+                this_pred["semantic_left_down"].item(),
+            )
+            output_points.append(point)
+        for last_edge in last_edges:
+            point1 = (
+                last_edge[0]["points"].tolist()[0],
+                last_edge[0]["points"].tolist()[1],
+                last_edge[0]["semantic_left_up"].item(),
+                last_edge[0]["semantic_right_up"].item(),
+                last_edge[0]["semantic_right_down"].item(),
+                last_edge[0]["semantic_left_down"].item(),
+            )
+            point2 = (
+                last_edge[1]["points"].tolist()[0],
+                last_edge[1]["points"].tolist()[1],
+                last_edge[1]["semantic_left_up"].item(),
+                last_edge[1]["semantic_right_up"].item(),
+                last_edge[1]["semantic_right_down"].item(),
+                last_edge[1]["semantic_left_down"].item(),
+            )
+            edge = (point1, point2)
+            output_edges.append(edge)
+        for this_edge in this_edges:
+            point1 = (
+                this_edge[0]["points"].tolist()[0],
+                this_edge[0]["points"].tolist()[1],
+                this_edge[0]["semantic_left_up"].item(),
+                this_edge[0]["semantic_right_up"].item(),
+                this_edge[0]["semantic_right_down"].item(),
+                this_edge[0]["semantic_left_down"].item(),
+            )
+            point2 = (
+                this_edge[1]["points"].tolist()[0],
+                this_edge[1]["points"].tolist()[1],
+                this_edge[1]["semantic_left_up"].item(),
+                this_edge[1]["semantic_right_up"].item(),
+                this_edge[1]["semantic_right_down"].item(),
+                this_edge[1]["semantic_left_down"].item(),
+            )
+            edge = (point1, point2)
+            output_edges.append(edge)
+    return output_points, output_edges
+def calculate_single_sample(
+    best_result, graph, target_d_rev, target_simple_cycles, target_results, d_rev, simple_cycles, results
+):
+    output_points, output_edges = get_results(best_result)
+    gt_points = [k for k, v in graph.items()]
+    gt_edges = []
+    for k, v in graph.items():
+        for adj in v:
+            if adj != (-1, -1):
+                gt_edge = (k, adj)
+                if (adj, k) not in gt_edges:
+                    gt_edges.append(gt_edge)
+    points_TP = 0
+    points_FP = 0
+    points_FN = 0
+    dist_error_x = 0
+    dist_error_y = 0
+    dist_error_l2 = 0
+    gt_points_copy = copy.deepcopy(gt_points)
+    threshold = 5
+    for output_point in output_points:
+        matched = False
+        for gt_point in gt_points:
+            if (abs(output_point[0] - gt_point[0]) <= threshold) and (abs(output_point[1] - gt_point[1]) <= threshold):
+                if gt_point in gt_points_copy:
+                    points_TP += 1
+                    dist_error_x += abs(output_point[0] - gt_point[0])
+                    dist_error_y += abs(output_point[1] - gt_point[1])
+                    dist_error_l2 += (
+                        abs(output_point[0] - gt_point[0]) ** 2 + abs(output_point[1] - gt_point[1]) ** 2
+                    ) ** 0.5
+                    matched = True
+                    gt_points_copy.remove(gt_point)
+                    break
+        if not matched:
+            points_FP += 1
+    points_FN = len(gt_points) - points_TP
+    edges_TP = 0
+    edges_FP = 0
+    edges_FN = 0
+    gt_edges_copy = copy.deepcopy(gt_edges)
+    threshold = 5
+    for output_edge in output_edges:
+        matched = False
+        for gt_edge in gt_edges:
+            if (
+                (
+                    (abs(output_edge[0][0] - gt_edge[0][0]) <= threshold)
+                    and (abs(output_edge[0][1] - gt_edge[0][1]) <= threshold)
+                )
+                and (
+                    (abs(output_edge[1][0] - gt_edge[1][0]) <= threshold)
+                    and (abs(output_edge[1][1] - gt_edge[1][1]) <= threshold)
+                )
+            ) or (
+                (
+                    (abs(output_edge[0][0] - gt_edge[1][0]) <= threshold)
+                    and (abs(output_edge[0][1] - gt_edge[1][1]) <= threshold)
+                )
+                and (
+                    (abs(output_edge[1][0] - gt_edge[0][0]) <= threshold)
+                    and (abs(output_edge[1][1] - gt_edge[0][1]) <= threshold)
+                )
+            ):
+                if gt_edge in gt_edges_copy:
+                    edges_TP += 1
+                    matched = True
+                    gt_edges_copy.remove(gt_edge)
+                    break
+        if not matched:
+            edges_FP += 1
+    edges_FN = len(gt_edges) - edges_TP
+    regions_TP = 0
+    regions_FP = 0
+    regions_FN = 0
+    rooms_TP = 0
+    rooms_FP = 0
+    rooms_FN = 0
+    gt_regions = []
+    output_regions = []
+    for target_simple_cycle in target_simple_cycles:
+        target_polyg = [(point_i[0], point_i[1]) for point_i in target_simple_cycle]
+        gt_regions.append(target_polyg)
+    for simple_cycle in simple_cycles:
+        polyg = [(point_i[0], point_i[1]) for point_i in simple_cycle]
+        polyg.pop(-1)
+        output_regions.append(polyg)
+    gt_regions_copy = copy.deepcopy(gt_regions)
+    iou_threshold = 0.7
+    for output_region_i, output_region in enumerate(output_regions):
+        matched = False
+        for gt_region_i, gt_region in enumerate(gt_regions):
+            if poly_iou(Polygon(gt_region), Polygon(output_region)) >= iou_threshold:
+                if gt_region in gt_regions_copy:
+                    regions_TP += 1
+                    if target_results[gt_region_i] == results[output_region_i]:
+                        rooms_TP += 1
+                    else:
+                        rooms_FP += 1
+                    matched = True
+                    gt_regions_copy.remove(gt_region)
+                    break
+        if not matched:
+            regions_FP += 1
+            rooms_FP += 1
+    regions_FN = len(gt_regions) - regions_TP
+    rooms_FN = len(gt_regions) - rooms_TP
+    # print(regions_TP, regions_FP, regions_FN)
+    # print(rooms_TP, rooms_FP, rooms_FN)
+    dist_error = (0, 0, 0)
+    if points_TP > 0:
+        dist_error = (dist_error_x, dist_error_y, dist_error_l2)
+    return (
+        points_TP,
+        points_FP,
+        points_FN,
+        edges_TP,
+        edges_FP,
+        edges_FN,
+        dist_error,
+        regions_TP,
+        regions_FP,
+        regions_FN,
+        rooms_TP,
+        rooms_FP,
+        rooms_FN,
+    )

data_preprocess/raster2graph/util/semantics_dict.py ADDED Viewed

	@@ -0,0 +1,45 @@

+semantics_dict = {
+    "living_room": 1,
+    "kitchen": 2,
+    "bedroom": 3,
+    "bathroom": 4,
+    "restroom": 5,
+    "balcony": 6,
+    "closet": 7,
+    "corridor": 8,
+    "washing_room": 9,
+    "PS": 10,
+    "outside": 11,
+    "wall": 12,
+    "no_type": 0,
+}
+semantics_dict_rev = {
+    0: "no_type",
+    1: "living_room",
+    2: "kitchen",
+    3: "bedroom",
+    4: "bathroom",
+    5: "restroom",
+    6: "balcony",
+    7: "closet",
+    8: "corridor",
+    9: "washing_room",
+    10: "PS",
+    11: "outside",
+    12: "wall",
+}
+semantics_dict_color = {
+    "living_room": (0, 0, 220),
+    "kitchen": (0, 220, 220),
+    "bedroom": (0, 220, 0),
+    "bathroom": (220, 220, 0),
+    "restroom": (220, 0, 0),
+    "balcony": (220, 0, 220),
+    "closet": (110, 0, 110),
+    "corridor": (110, 0, 0),
+    "washing_room": (0, 0, 110),
+    "PS": (0, 110, 110),
+    "outside": (0, 0, 0),
+    "wall": (110, 110, 110),
+    "no_type": (20, 20, 20),
+}

data_preprocess/stru3d/PointCloudReaderPanorama.py ADDED Viewed

	@@ -0,0 +1,253 @@

+import os
+import cv2
+import numpy as np
+import open3d as o3d
+NUM_SECTIONS = -1
+class PointCloudReaderPanorama:
+    def __init__(self, path, resolution="full", random_level=0, generate_color=False, generate_normal=False):
+        self.path = path
+        self.random_level = random_level
+        self.resolution = resolution
+        self.generate_color = generate_color
+        self.generate_normal = generate_normal
+        sections = [p for p in os.listdir(os.path.join(path, "2D_rendering"))]
+        self.depth_paths = [
+            os.path.join(*[path, "2D_rendering", p, "panorama", self.resolution, "depth.png"]) for p in sections
+        ]
+        self.rgb_paths = [
+            os.path.join(*[path, "2D_rendering", p, "panorama", self.resolution, "rgb_coldlight.png"])
+            for p in sections
+        ]
+        self.normal_paths = [
+            os.path.join(*[path, "2D_rendering", p, "panorama", self.resolution, "normal.png"]) for p in sections
+        ]
+        self.camera_paths = [os.path.join(*[path, "2D_rendering", p, "panorama", "camera_xyz.txt"]) for p in sections]
+        self.camera_centers = self.read_camera_center()
+        self.point_cloud = self.generate_point_cloud(
+            self.random_level, color=self.generate_color, normal=self.generate_normal
+        )
+    def read_camera_center(self):
+        camera_centers = []
+        for i in range(len(self.camera_paths)):
+            with open(self.camera_paths[i], "r") as f:
+                line = f.readline()
+            center = list(map(float, line.strip().split(" ")))
+            camera_centers.append(np.asarray([center[0], center[1], center[2]]))
+        return camera_centers
+    def generate_point_cloud(self, random_level=0, color=False, normal=False):
+        coords = []
+        colors = []
+        points = {}
+        # normals = []
+        # Getting Coordinates
+        for i in range(len(self.depth_paths)):
+            depth_img = cv2.imread(self.depth_paths[i], cv2.IMREAD_ANYDEPTH | cv2.IMREAD_ANYCOLOR)
+            x_tick = 180.0 / depth_img.shape[0]
+            y_tick = 360.0 / depth_img.shape[1]
+            rgb_img = cv2.imread(self.rgb_paths[i])
+            rgb_img = cv2.cvtColor(rgb_img, code=cv2.COLOR_BGR2RGB)
+            # normal_img = cv2.imread(self.normal_paths[i])
+            for x in range(0, depth_img.shape[0]):
+                for y in range(0, depth_img.shape[1]):
+                    # need 90 - -09
+                    alpha = 90 - (x * x_tick)
+                    beta = y * y_tick - 180
+                    depth = depth_img[x, y] + np.random.random() * random_level
+                    if depth > 500.0:
+                        z_offset = depth * np.sin(np.deg2rad(alpha))
+                        xy_offset = depth * np.cos(np.deg2rad(alpha))
+                        x_offset = xy_offset * np.sin(np.deg2rad(beta))
+                        y_offset = xy_offset * np.cos(np.deg2rad(beta))
+                        point = np.asarray([x_offset, y_offset, z_offset])
+                        coords.append(point + self.camera_centers[i])
+                        colors.append(rgb_img[x, y])
+                        # normals.append(normalize(normal_img[x, y].reshape(-1, 1)).ravel())
+        coords = np.asarray(coords)
+        colors = np.asarray(colors) / 255.0
+        # normals = np.asarray(normals)
+        coords[:, :2] = np.round(coords[:, :2] / 10) * 10.0
+        coords[:, 2] = np.round(coords[:, 2] / 100) * 100.0
+        unique_coords, unique_ind = np.unique(coords, return_index=True, axis=0)
+        coords = coords[unique_ind]
+        colors = colors[unique_ind]
+        # normals = normals[unique_ind]
+        points["coords"] = coords
+        points["colors"] = colors
+        # points['normals'] = normals
+        print("Pointcloud size:", points["coords"].shape[0])
+        return points
+    def get_point_cloud(self):
+        return self.point_cloud
+    def generate_density(self, width=256, height=256):
+        ps = self.point_cloud["coords"] * -1
+        ps[:, 0] *= -1
+        ps[:, 1] *= -1
+        pcd = o3d.geometry.PointCloud()
+        pcd.points = o3d.utility.Vector3dVector(ps)
+        pcd.estimate_normals()
+        # zs = np.round(ps[:,2] / 100) * 100
+        # zs, zs_ind = np.unique(zs, return_index=True, axis=0)
+        # ps_ind = ps[:, :2] ==
+        # print("Generate density...")
+        image_res = np.array((width, height))
+        max_coords = np.max(ps, axis=0)
+        min_coords = np.min(ps, axis=0)
+        max_m_min = max_coords - min_coords
+        max_coords = max_coords + 0.1 * max_m_min
+        min_coords = min_coords - 0.1 * max_m_min
+        normalization_dict = {}
+        normalization_dict["min_coords"] = min_coords
+        normalization_dict["max_coords"] = max_coords
+        normalization_dict["image_res"] = image_res
+        # coordinates = np.round(points[:, :2] / max_coordinates[None,:2] * image_res[None])
+        coordinates = np.round(
+            (ps[:, :2] - min_coords[None, :2]) / (max_coords[None, :2] - min_coords[None, :2]) * image_res[None]
+        )
+        coordinates = np.minimum(np.maximum(coordinates, np.zeros_like(image_res)), image_res - 1)
+        density = np.zeros((height, width), dtype=np.float32)
+        unique_coordinates, counts = np.unique(coordinates, return_counts=True, axis=0)
+        # print(np.unique(counts))
+        # counts = np.minimum(counts, 1e2)
+        unique_coordinates = unique_coordinates.astype(np.int32)
+        density[unique_coordinates[:, 1], unique_coordinates[:, 0]] = counts
+        density = density / np.max(density)
+        # print(np.unique(density))
+        normals = np.array(pcd.normals)
+        normals_map = np.zeros((density.shape[0], density.shape[1], 3))
+        import time
+        start_time = time.time()
+        for i, unique_coord in enumerate(unique_coordinates):
+            # print(normals[unique_ind])
+            normals_indcs = np.argwhere(np.all(coordinates[::10] == unique_coord, axis=1))[:, 0]
+            normals_map[unique_coordinates[i, 1], unique_coordinates[i, 0], :] = np.mean(
+                normals[::10][normals_indcs, :], axis=0
+            )
+        print("Time for normals: ", time.time() - start_time)
+        normals_map = (np.clip(normals_map, 0, 1) * 255).astype(np.uint8)
+        # plt.figure()
+        # plt.imshow(normals_map)
+        # plt.show()
+        return density, normals_map, normalization_dict
+    def visualize(self, export_path=None):
+        pcd = o3d.geometry.PointCloud()
+        points = self.point_cloud["coords"]
+        print(np.max(points, axis=0))
+        indices = np.where(points[:, 2] < 2000)
+        points = points[indices]
+        points[:, 1] *= -1
+        points[:, :] /= 1000
+        pcd.points = o3d.utility.Vector3dVector(points)
+        if self.generate_normal:
+            normals = self.point_cloud["normals"]
+            normals = normals[indices]
+            pcd.normals = o3d.utility.Vector3dVector(normals)
+        if self.generate_color:
+            colors = self.point_cloud["colors"]
+            colors = colors[indices]
+            pcd.colors = o3d.utility.Vector3dVector(colors)
+        # wireframe_geo_list = visualize_wireframe(annos, vis=False, ret=True)
+        # o3d.visualization.draw_geometries([pcd] + wireframe_geo_list)
+        # o3d.visualization.draw_geometries([pcd])
+        pcd.estimate_normals()
+        # radii = 0.01
+        # mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_ball_pivoting(pcd, radii)
+        # alpha = 0.1
+        # tetra_mesh, pt_map = o3d.geometry.TetraMesh.create_from_point_cloud(pcd)
+        # mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_alpha_shape(pcd, alpha, tetra_mesh, pt_map)
+        o3d.visualization.draw_geometries([pcd])
+        if export_path is not None:
+            o3d.io.write_point_cloud(export_path, pcd)
+        # o3d.visualization.draw_geometries([pcd])
+    def export_ply(self, path):
+        """
+        ply
+        format ascii 1.0
+        comment Mars model by Paul Bourke
+        element vertex 259200
+        property float x
+        property float y
+        property float z
+        property uchar r
+        property uchar g
+        property uchar b
+        property float nx
+        property float ny
+        property float nz
+        end_header
+        """
+        with open(path, "w") as f:
+            f.write("ply\n")
+            f.write("format ascii 1.0\n")
+            f.write("element vertex %d\n" % self.point_cloud["coords"].shape[0])
+            f.write("property float x\n")
+            f.write("property float y\n")
+            f.write("property float z\n")
+            if self.generate_color:
+                f.write("property uchar red\n")
+                f.write("property uchar green\n")
+                f.write("property uchar blue\n")
+            if self.generate_normal:
+                f.write("property float nx\n")
+                f.write("property float ny\n")
+                f.write("property float nz\n")
+            f.write("end_header\n")
+            for i in range(self.point_cloud["coords"].shape[0]):
+                normal = []
+                color = []
+                coord = self.point_cloud["coords"][i].tolist()
+                if self.generate_color:
+                    color = list(map(int, (self.point_cloud["colors"][i] * 255).tolist()))
+                if self.generate_normal:
+                    normal = self.point_cloud["normals"][i].tolist()
+                data = coord + color + normal
+                f.write(" ".join(list(map(str, data))) + "\n")

data_preprocess/stru3d/generate_coco_stru3d.py ADDED Viewed

	@@ -0,0 +1,199 @@

+import argparse
+import json
+import os
+import sys
+from stru3d_utils import generate_coco_dict, generate_density, normalize_annotations, parse_floor_plan_polys
+from tqdm import tqdm
+sys.path.append("../.")
+from common_utils import export_density, read_scene_pc
+### Note: Some scenes have missing/wrong annotations. These are the indices that you should additionally exclude
+### to be consistent with MonteFloor and HEAT:
+invalid_scenes_ids = [
+    76,
+    183,
+    335,
+    491,
+    663,
+    681,
+    703,
+    728,
+    865,
+    936,
+    985,
+    986,
+    1009,
+    1104,
+    1155,
+    1221,
+    1282,
+    1365,
+    1378,
+    1635,
+    1745,
+    1772,
+    1774,
+    1816,
+    1866,
+    2037,
+    2076,
+    2274,
+    2334,
+    2357,
+    2580,
+    2665,
+    2706,
+    2713,
+    2771,
+    2868,
+    3156,
+    3192,
+    3198,
+    3261,
+    3271,
+    3276,
+    3296,
+    3342,
+    3387,
+    3398,
+    3466,
+    3496,
+]
+type2id = {
+    "living room": 0,
+    "kitchen": 1,
+    "bedroom": 2,
+    "bathroom": 3,
+    "balcony": 4,
+    "corridor": 5,
+    "dining room": 6,
+    "study": 7,
+    "studio": 8,
+    "store room": 9,
+    "garden": 10,
+    "laundry room": 11,
+    "office": 12,
+    "basement": 13,
+    "garage": 14,
+    "undefined": 15,
+    "door": 16,
+    "window": 17,
+}
+def config():
+    a = argparse.ArgumentParser(description="Generate coco format data for Structured3D")
+    a.add_argument(
+        "--data_root", default="Structured3D_panorama", type=str, help="path to raw Structured3D_panorama folder"
+    )
+    a.add_argument("--output", default="coco_stru3d", type=str, help="path to output folder")
+    args = a.parse_args()
+    return args
+def main(args):
+    data_root = args.data_root
+    data_parts = os.listdir(data_root)
+    ### prepare
+    outFolder = args.output
+    if not os.path.exists(outFolder):
+        os.mkdir(outFolder)
+    annotation_outFolder = os.path.join(outFolder, "annotations")
+    if not os.path.exists(annotation_outFolder):
+        os.mkdir(annotation_outFolder)
+    train_img_folder = os.path.join(outFolder, "train")
+    val_img_folder = os.path.join(outFolder, "val")
+    test_img_folder = os.path.join(outFolder, "test")
+    for img_folder in [train_img_folder, val_img_folder, test_img_folder]:
+        if not os.path.exists(img_folder):
+            os.mkdir(img_folder)
+    coco_train_json_path = os.path.join(annotation_outFolder, "train.json")
+    coco_val_json_path = os.path.join(annotation_outFolder, "val.json")
+    coco_test_json_path = os.path.join(annotation_outFolder, "test.json")
+    coco_train_dict = {"images": [], "annotations": [], "categories": []}
+    coco_val_dict = {"images": [], "annotations": [], "categories": []}
+    coco_test_dict = {"images": [], "annotations": [], "categories": []}
+    for key, value in type2id.items():
+        type_dict = {"supercategory": "room", "id": value, "name": key}
+        coco_train_dict["categories"].append(type_dict)
+        coco_val_dict["categories"].append(type_dict)
+        coco_test_dict["categories"].append(type_dict)
+    ### begin processing
+    instance_id = 0
+    for part in tqdm(data_parts):
+        scenes = os.listdir(os.path.join(data_root, part, "Structured3D"))
+        for scene in tqdm(scenes):
+            scene_path = os.path.join(data_root, part, "Structured3D", scene)
+            scene_id = scene.split("_")[-1]
+            if int(scene_id) in invalid_scenes_ids:
+                print("skip {}".format(scene))
+                continue
+            # load pre-generated point cloud
+            ply_path = os.path.join(scene_path, "point_cloud.ply")
+            points = read_scene_pc(ply_path)
+            xyz = points[:, :3]
+            ### project point cloud to density map
+            density, normalization_dict = generate_density(xyz, width=256, height=256)
+            ### rescale raw annotations
+            normalized_annos = normalize_annotations(scene_path, normalization_dict)
+            ### prepare coco dict
+            img_id = int(scene_id)
+            img_dict = {}
+            img_dict["file_name"] = scene_id + ".png"
+            img_dict["id"] = img_id
+            img_dict["width"] = 256
+            img_dict["height"] = 256
+            ### parse annotations
+            polys = parse_floor_plan_polys(normalized_annos)
+            polygons_list = generate_coco_dict(normalized_annos, polys, instance_id, img_id, ignore_types=["outwall"])
+            instance_id += len(polygons_list)
+            ### train
+            if int(scene_id) < 3000:
+                coco_train_dict["images"].append(img_dict)
+                coco_train_dict["annotations"] += polygons_list
+                export_density(density, train_img_folder, scene_id)
+            ### val
+            elif int(scene_id) >= 3000 and int(scene_id) < 3250:
+                coco_val_dict["images"].append(img_dict)
+                coco_val_dict["annotations"] += polygons_list
+                export_density(density, val_img_folder, scene_id)
+            ### test
+            else:
+                coco_test_dict["images"].append(img_dict)
+                coco_test_dict["annotations"] += polygons_list
+                export_density(density, test_img_folder, scene_id)
+            print(scene_id)
+    with open(coco_train_json_path, "w") as f:
+        json.dump(coco_train_dict, f)
+    with open(coco_val_json_path, "w") as f:
+        json.dump(coco_val_dict, f)
+    with open(coco_test_json_path, "w") as f:
+        json.dump(coco_test_dict, f)
+if __name__ == "__main__":
+    main(config())

data_preprocess/stru3d/generate_point_cloud_stru3d.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import argparse
+import os
+from PointCloudReaderPanorama import PointCloudReaderPanorama
+from tqdm import tqdm
+def config():
+    a = argparse.ArgumentParser(description="Generate point cloud for Structured3D")
+    a.add_argument(
+        "--data_root", default="Structured3D_panorama", type=str, help="path to raw Structured3D_panorama folder"
+    )
+    args = a.parse_args()
+    return args
+def main(args):
+    print("Creating point cloud from perspective views...")
+    data_root = args.data_root
+    data_parts = os.listdir(data_root)
+    for part in tqdm(data_parts):
+        scenes = os.listdir(os.path.join(data_root, part, "Structured3D"))
+        for scene in tqdm(scenes):
+            scene_path = os.path.join(data_root, part, "Structured3D", scene)
+            reader = PointCloudReaderPanorama(scene_path, random_level=0, generate_color=True, generate_normal=False)
+            save_path = os.path.join(data_root, part, "Structured3D", scene, "point_cloud.ply")
+            reader.export_ply(save_path)
+if __name__ == "__main__":
+    main(config())

data_preprocess/stru3d/stru3d_utils.py ADDED Viewed

	@@ -0,0 +1,244 @@

+"""
+This code is an adaptation that uses Structured 3D for the code base.
+Reference: https://github.com/bertjiazheng/Structured3D
+"""
+import json
+import os
+import sys
+import numpy as np
+from shapely.geometry import Polygon
+sys.path.append("../data_preprocess")
+from common_utils import resort_corners
+type2id = {
+    "living room": 0,
+    "kitchen": 1,
+    "bedroom": 2,
+    "bathroom": 3,
+    "balcony": 4,
+    "corridor": 5,
+    "dining room": 6,
+    "study": 7,
+    "studio": 8,
+    "store room": 9,
+    "garden": 10,
+    "laundry room": 11,
+    "office": 12,
+    "basement": 13,
+    "garage": 14,
+    "undefined": 15,
+    "door": 16,
+    "window": 17,
+}
+def generate_density(point_cloud, width=256, height=256):
+    ps = point_cloud * -1
+    ps[:, 0] *= -1
+    ps[:, 1] *= -1
+    image_res = np.array((width, height))
+    max_coords = np.max(ps, axis=0)
+    min_coords = np.min(ps, axis=0)
+    max_m_min = max_coords - min_coords
+    max_coords = max_coords + 0.1 * max_m_min
+    min_coords = min_coords - 0.1 * max_m_min
+    normalization_dict = {}
+    normalization_dict["min_coords"] = min_coords
+    normalization_dict["max_coords"] = max_coords
+    normalization_dict["image_res"] = image_res
+    # coordinates = np.round(points[:, :2] / max_coordinates[None,:2] * image_res[None])
+    coordinates = np.round(
+        (ps[:, :2] - min_coords[None, :2]) / (max_coords[None, :2] - min_coords[None, :2]) * image_res[None]
+    )
+    coordinates = np.minimum(np.maximum(coordinates, np.zeros_like(image_res)), image_res - 1)
+    density = np.zeros((height, width), dtype=np.float32)
+    unique_coordinates, counts = np.unique(coordinates, return_counts=True, axis=0)
+    # print(np.unique(counts))
+    # counts = np.minimum(counts, 1e2)
+    unique_coordinates = unique_coordinates.astype(np.int32)
+    density[unique_coordinates[:, 1], unique_coordinates[:, 0]] = counts
+    density = density / np.max(density)
+    return density, normalization_dict
+def normalize_point(point, normalization_dict):
+    min_coords = normalization_dict["min_coords"]
+    max_coords = normalization_dict["max_coords"]
+    image_res = normalization_dict["image_res"]
+    point_2d = np.round((point[:2] - min_coords[:2]) / (max_coords[:2] - min_coords[:2]) * image_res)
+    point_2d = np.minimum(np.maximum(point_2d, np.zeros_like(image_res)), image_res - 1)
+    point[:2] = point_2d.tolist()
+    return point
+def normalize_annotations(scene_path, normalization_dict):
+    annotation_path = os.path.join(scene_path, "annotation_3d.json")
+    with open(annotation_path, "r") as f:
+        annotation_json = json.load(f)
+    for line in annotation_json["lines"]:
+        point = line["point"]
+        point = normalize_point(point, normalization_dict)
+        line["point"] = point
+    for junction in annotation_json["junctions"]:
+        point = junction["coordinate"]
+        point = normalize_point(point, normalization_dict)
+        junction["coordinate"] = point
+    return annotation_json
+def parse_floor_plan_polys(annos):
+    planes = []
+    for semantic in annos["semantics"]:
+        for planeID in semantic["planeID"]:
+            if annos["planes"][planeID]["type"] == "floor":
+                planes.append({"planeID": planeID, "type": semantic["type"]})
+        # if semantic["type"] == "outwall":
+        #     outerwall_planes = semantic["planeID"]
+    # extract hole vertices
+    lines_holes = []
+    for semantic in annos["semantics"]:
+        if semantic["type"] in ["window", "door"]:
+            for planeID in semantic["planeID"]:
+                lines_holes.extend(np.where(np.array(annos["planeLineMatrix"][planeID]))[0].tolist())
+    lines_holes = np.unique(lines_holes)
+    ## junctions on the floor
+    # junctions = np.array([junc["coordinate"] for junc in annos["junctions"]])
+    # construct each polygon
+    polygons = []
+    for plane in planes:
+        lineIDs = np.where(np.array(annos["planeLineMatrix"][plane["planeID"]]))[0].tolist()
+        junction_pairs = [np.where(np.array(annos["lineJunctionMatrix"][lineID]))[0].tolist() for lineID in lineIDs]
+        polygon = convert_lines_to_vertices(junction_pairs)
+        polygons.append([polygon[0], plane["type"]])
+    return polygons
+def convert_lines_to_vertices(lines):
+    """
+    convert line representation to polygon vertices
+    """
+    polygons = []
+    lines = np.array(lines)
+    polygon = None
+    while len(lines) != 0:
+        if polygon is None:
+            polygon = lines[0].tolist()
+            lines = np.delete(lines, 0, 0)
+        lineID, juncID = np.where(lines == polygon[-1])
+        vertex = lines[lineID[0], 1 - juncID[0]]
+        lines = np.delete(lines, lineID, 0)
+        if vertex in polygon:
+            polygons.append(polygon)
+            polygon = None
+        else:
+            polygon.append(vertex)
+    return polygons
+def generate_coco_dict(annos, polygons, curr_instance_id, curr_img_id, ignore_types):
+    junctions = np.array([junc["coordinate"][:2] for junc in annos["junctions"]])
+    coco_annotation_dict_list = []
+    for poly_ind, (polygon, poly_type) in enumerate(polygons):
+        if poly_type in ignore_types:
+            continue
+        polygon = junctions[np.array(polygon)]
+        poly_shapely = Polygon(polygon)
+        area = poly_shapely.area
+        # assert area > 10
+        # if area < 100:
+        if poly_type not in ["door", "window"] and area < 100:
+            continue
+        if poly_type in ["door", "window"] and area < 1:
+            continue
+        rectangle_shapely = poly_shapely.envelope
+        ### here we convert door/window annotation into a single line
+        if poly_type in ["door", "window"]:
+            assert polygon.shape[0] == 4
+            midp_1 = (polygon[0] + polygon[1]) / 2
+            midp_2 = (polygon[1] + polygon[2]) / 2
+            midp_3 = (polygon[2] + polygon[3]) / 2
+            midp_4 = (polygon[3] + polygon[0]) / 2
+            dist_1_3 = np.square(midp_1 - midp_3).sum()
+            dist_2_4 = np.square(midp_2 - midp_4).sum()
+            if dist_1_3 > dist_2_4:
+                polygon = np.row_stack([midp_1, midp_3])
+            else:
+                polygon = np.row_stack([midp_2, midp_4])
+        coco_seg_poly = []
+        poly_sorted = resort_corners(polygon)
+        for p in poly_sorted:
+            coco_seg_poly += list(p)
+        # Slightly wider bounding box
+        bound_pad = 2
+        bb_x, bb_y = rectangle_shapely.exterior.xy
+        bb_x = np.unique(bb_x)
+        bb_y = np.unique(bb_y)
+        bb_x_min = np.maximum(np.min(bb_x) - bound_pad, 0)
+        bb_y_min = np.maximum(np.min(bb_y) - bound_pad, 0)
+        bb_x_max = np.minimum(np.max(bb_x) + bound_pad, 256 - 1)
+        bb_y_max = np.minimum(np.max(bb_y) + bound_pad, 256 - 1)
+        bb_width = bb_x_max - bb_x_min
+        bb_height = bb_y_max - bb_y_min
+        coco_bb = [bb_x_min, bb_y_min, bb_width, bb_height]
+        coco_annotation_dict = {
+            "segmentation": [coco_seg_poly],
+            "area": area,
+            "iscrowd": 0,
+            "image_id": curr_img_id,
+            "bbox": coco_bb,
+            "category_id": type2id[poly_type],
+            "id": curr_instance_id,
+        }
+        coco_annotation_dict_list.append(coco_annotation_dict)
+        curr_instance_id += 1
+    return coco_annotation_dict_list

data_preprocess/tools/plot_data.sh ADDED Viewed

	@@ -0,0 +1,60 @@

+#!/usr/bin/env bash
+# Additional useful arguments:
+# --crop_white_space: remove redundant whitespace from the rendering
+# --one_color: use single color for every room (i.e. yellow)
+# --compute_stats: compute statistics of the dataset (e.g. max_num_pts, max_num_polys)
+# and plot histogram for counting number of Points, Rooms, Corners
+# --drop_wd: disable Windor & Door in the plots
+# --image_scale: adjust rendering resolution of the plots
+SPLIT=test
+python plot_floor.py --dataset_name=stru3d \
+               --dataset_root=data/coco_s3d_bw/ \
+               --eval_set=${SPLIT} \
+               --output_dir=data_plots/output_gt_s3dbw/${SPLIT} \
+               --semantic_classes=19 \
+               --input_channels 3 \
+               --disable_image_transform \
+               --poly2seq \
+               --image_size 256 \
+               --image_scale 1 \
+               --compute_stats \
+               --plot_gt \
+               --plot_gt_image \
+               --plot_polys \
+               --plot_density
+SPLIT=test
+python plot_floor.py --dataset_name=r2g \
+               --dataset_root=data/R2G_hr_dataset_processed_v1/ \
+               --eval_set=${SPLIT} \
+               --output_dir=output_gt_r2g/${SPLIT} \
+               --semantic_classes=13 \
+               --input_channels 3 \
+               --poly2seq \
+               --disable_image_transform \
+               --image_size 256 \
+               --image_scale 1 \
+               --compute_stats \
+               --plot_gt \
+               --plot_polys \
+               --plot_density
+SPLIT=test
+python plot_floor.py --dataset_name=cubicasa \
+               --dataset_root=data/coco_cubicasa5k_nowalls_v4-1_refined \
+               --eval_set=${SPLIT} \
+               --output_dir=data_plots/output_gt_cc5k/${SPLIT} \
+               --semantic_classes=12 \
+               --input_channels 3 \
+               --disable_image_transform \
+               --poly2seq \
+               --image_size 256 \
+               --image_scale 1 \
+               --compute_stats \
+               --plot_gt \
+               --plot_polys \
+               --plot_density

data_preprocess/tools/run_cc5k.sh ADDED Viewed

	@@ -0,0 +1,15 @@

+# create COCO-style dataset for CubiCasa5k
+python -m data_preprocess.cubicasa5k.create_coco_cc5k --data_root=data/cubicasa5k/ \
+    --output=data/coco_cubicasa5k_nowalls_v4/ \
+    --disable_wd2line
+# Split example has more than 1 floorplan into separate samples
+python -m data_preprocess.cubicasa5k.create_coco_cc5k.floorplan_extraction \
+    --data_root data/coco_cubicasa5k_nowalls_v4/ \
+    --output data/coco_cubicasa5k_nowalls_v4-1_refined/
+# Merge individual JSONs into single JSON file per split (train/val/test)
+# This must be done after floorplan_extraction.py
+python -m data_preprocess.cubicasa5k.combine_json \
+    --input data/coco_cubicasa5k_nowalls_v4-1_refined/ \
+    --output data/coco_cubicasa5k_nowalls_v4-1_refined/annotations/ \

data_preprocess/tools/run_r2g.sh ADDED Viewed

	@@ -0,0 +1,12 @@

+# preprocess raw Raster2Graph high-resolution dataset
+python -m data_preprocess.raster2graph.image_process --data_root=data/R2G_hr_dataset/
+# convert to COCO-style dataset
+python -m data_preprocess.raster2graph.convert_to_coco --dataset_path data/R2G_hr_dataset/ --output_dir data/R2G_hr_dataset_processed/
+# combine JSON files into single JSON file per split
+python -m data_preprocess.raster2graph.combine_json \
+    --input data/R2G_hr_dataset_processed/ \
+    --output data/R2G_hr_dataset_processed_v1/ \
+rm -rf data/R2G_hr_dataset_processed/

data_preprocess/tools/run_s3d.sh ADDED Viewed

	@@ -0,0 +1,22 @@

+## Assume the Structured3D density dataset are downloaded
+DATA=data/coco_s3d
+for split in train val test; do
+    python plot_floor.py --dataset_name=stru3d \
+        --dataset_root=${DATA} \
+        --eval_set=${split} \
+        --output_dir=data/coco_s3d_bw/${split}/ \
+        --semantic_classes=19 \
+        --input_channels 3 \
+        --disable_image_transform \
+        --poly2seq \
+        --image_size 256 \
+        --image_scale 1 \
+        --plot_gt \
+        --is_bw \
+        --plot_engine matplotlib
+done
+# Reuse the annotations
+cp -r data/coco_s3d/annotations data/coco_s3d_bw/

data_preprocess/tools/run_waffle.sh ADDED Viewed

	@@ -0,0 +1,3 @@

+python -m data_preprocess.waffle.create_coco_waffle_benchmark \
+    --data_root data/waffle/benchmark/ \
+    --output data/waffle_benchmark_processed/

data_preprocess/waffle/create_coco_waffle_benchmark.py ADDED Viewed

	@@ -0,0 +1,290 @@

+import argparse
+import json
+import os
+import sys
+from glob import glob
+from pathlib import Path
+import cv2
+import numpy as np
+from PIL import Image
+from shapely.geometry import Polygon
+sys.path.append(str(Path(__file__).resolve().parent.parent))
+from common_utils import resort_corners
+def draw_polygon_on_image(image, polygons, class_to_color):
+    """
+    Draws polygons on the image based on the COLOR_TO_CLASS mapping.
+    Args:
+        image (numpy.ndarray): The image on which to draw.
+        polygons (list of list of tuple): List of polygons, where each polygon is a list of (x, y) points.
+    Returns:
+        numpy.ndarray: The image with polygons drawn.
+    """
+    # Draw each polygon on the image
+    for polygon, polygon_class in polygons:
+        # Convert polygon points to numpy array
+        pts = np.array(polygon, dtype=np.int32).reshape(-1, 2)
+        color = class_to_color[polygon_class]
+        bgr = (color[2], color[1], color[0])  # Convert RGB to BGR for OpenCV
+        # Draw filled polygon
+        cv2.fillPoly(image, [pts], bgr)
+    return image
+def fill_mask(segmentation_mask):
+    filled_mask = np.zeros_like(segmentation_mask, dtype=np.uint8)
+    # Iterate over each class index in the segmentation mask
+    for class_index in np.unique(segmentation_mask):
+        if class_index == 0:  # Skip the background
+            continue
+        # Create a binary mask for the current class
+        binary_mask = (segmentation_mask == class_index).astype(np.uint8)
+        # Find contours for the current class
+        contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        # Fill each contour with white color in the single-channel mask
+        cv2.drawContours(filled_mask, contours, -1, 255, thickness=cv2.FILLED)
+    return filled_mask
+def to_bw_image(input_image):
+    # Convert the input image to grayscale
+    gray_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2GRAY)
+    # Apply a binary threshold to convert the grayscale image to black and white
+    _, bw_image = cv2.threshold(gray_image, 127, 255, cv2.THRESH_BINARY)
+    return bw_image
+def create_coco_bounding_box(bb_x, bb_y, image_width, image_height, bound_pad=2):
+    bb_x = np.unique(bb_x)
+    bb_y = np.unique(bb_y)
+    bb_x_min = np.maximum(np.min(bb_x) - bound_pad, 0)
+    bb_y_min = np.maximum(np.min(bb_y) - bound_pad, 0)
+    bb_x_max = np.minimum(np.max(bb_x) + bound_pad, image_width - 1)
+    bb_y_max = np.minimum(np.max(bb_y) + bound_pad, image_height - 1)
+    bb_width = bb_x_max - bb_x_min
+    bb_height = bb_y_max - bb_y_min
+    coco_bb = [bb_x_min, bb_y_min, bb_width, bb_height]
+    return coco_bb
+def prepare_dict(categories_dict):
+    save_dict = {"images": [], "annotations": [], "categories": []}
+    for key, value in categories_dict.items():
+        type_dict = {"supercategory": "room", "id": value, "name": key}
+        save_dict["categories"].append(type_dict)
+    return save_dict
+def convert_numpy_to_python(obj):
+    if isinstance(obj, np.integer):
+        return int(obj)
+    elif isinstance(obj, np.floating):
+        return float(obj)
+    elif isinstance(obj, np.ndarray):
+        return obj.tolist()
+    else:
+        return obj
+def config():
+    a = argparse.ArgumentParser(description="Generate coco format data for WAFFLE BENCHMARK SET")
+    a.add_argument("--data_root", default="data/waffle/benchmark/", type=str, help="path to WAFFLE BENCHMARK folder")
+    a.add_argument("--output", default="data/waffle_benchmark_processed/", type=str, help="path to output folder")
+    args = a.parse_args()
+    return args
+if __name__ == "__main__":
+    LABEL_NOTATIONS = {
+        "Background": (0, 0, 0),  # Black
+        "Interior": (255, 255, 255),  # White
+        "Walls": (255, 0, 0),  # Red
+        "Doors": (0, 0, 255),  # Blue
+        "Windows": (0, 255, 255),  # Cyan
+    }
+    CLASS2INDEX = {
+        "Background": 0,  # Black
+        "Interior": 1,  # White
+        # "Walls": 2,          # Red
+        "Doors": 3,  # Blue
+        "Windows": 4,  # Cyan
+    }
+    # Create a mapping from RGB values to class indices
+    COLOR_TO_CLASS = {
+        (0, 0, 0): 0,  # Background
+        (255, 255, 255): 1,  # Interior
+        (255, 0, 0): 2,  # Walls
+        (0, 0, 255): 3,  # Doors
+        (0, 255, 255): 4,  # Windows
+    }
+    NEW_CLASS_MAPPING = {
+        1: 0,
+        3: 1,
+        4: 2,
+    }
+    CLASS_TO_COLOR = {
+        0: (255, 255, 255),  # Interior
+        1: (0, 0, 255),  # Doors
+        2: (0, 255, 255),  # Windows
+    }
+    args = config()
+    root = args.data_root
+    image_dir = f"{root}/pngs"
+    label_dir = f"{root}/segmented_descrete_pngs"
+    input_paths = sorted(glob(f"{label_dir}/*.png"))
+    output_dir = args.output
+    output_aux_dir = f"{output_dir}/aux"
+    output_image_dir = f"{output_dir}/test/"
+    output_annot_dir = f"{output_dir}/annotations/"
+    fn_mapping_log = f"{output_annot_dir}/test_image_id_mapping.json"
+    os.makedirs(output_dir, exist_ok=True)
+    os.makedirs(output_aux_dir, exist_ok=True)
+    os.makedirs(output_image_dir, exist_ok=True)
+    os.makedirs(output_annot_dir, exist_ok=True)
+    instance_count = 0
+    save_dict = prepare_dict(CLASS2INDEX)
+    output_mappings = []
+    for i, path in enumerate(input_paths):
+        # if i > 5:
+        #     exit(0)
+        mask = Image.open(path).convert("RGB")
+        fn = os.path.basename(path).replace("_seg_colors.png", "")
+        new_fn = str(i).zfill(5)
+        mask = np.array(mask)
+        image = Image.open(os.path.join(image_dir, f"{fn}.png")).convert("RGB")
+        image_width, image_height = image.size
+        # Initialize an empty segmentation mask with the same height and width as the input mask
+        segmentation_mask = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8)
+        img_id = i
+        img_dict = {}
+        img_dict["file_name"] = str(img_id).zfill(5) + ".png"
+        img_dict["id"] = img_id
+        img_dict["width"] = image_width
+        img_dict["height"] = image_height
+        output_polygons = []
+        coco_annotation_dict_list = []
+        # Iterate over each pixel in the mask and assign the corresponding class index
+        for color, class_index in COLOR_TO_CLASS.items():
+            # Create a boolean mask for the current color
+            color_mask = (mask == color).all(axis=-1)
+            color_mask_uint8 = color_mask.astype(np.uint8)
+            # Assign the class index to the segmentation mask
+            segmentation_mask[color_mask] = class_index
+            if class_index not in NEW_CLASS_MAPPING:
+                continue
+            class_index = NEW_CLASS_MAPPING[class_index]
+            # Find contours for the current color mask
+            contours, _ = cv2.findContours(color_mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            new_contours = []
+            for cnt in contours:
+                peri = cv2.arcLength(cnt, True)
+                approx = cv2.approxPolyDP(cnt, 0.001 * peri, True)
+                new_contours.append(approx)
+            # Convert contours to polygon coordinates
+            polygons = [contour.reshape(-1, 2) for contour in new_contours]
+            for polygon in polygons:
+                # Convert the polygon to a Shapely Polygon object
+                if polygon.shape[0] < 3:
+                    continue
+                shapely_polygon = Polygon(polygon)
+                area = shapely_polygon.area
+                rectangle_shapely = shapely_polygon.envelope
+                bb_x, bb_y = rectangle_shapely.exterior.xy
+                coco_bb = create_coco_bounding_box(bb_x, bb_y, image_width, image_height, bound_pad=2)
+                if class_index in [3, 4] and area < 1:
+                    continue
+                if class_index not in [3, 4] and area < 100:
+                    continue
+                coco_seg_poly = []
+                poly_sorted = resort_corners(polygon)
+                # image = draw_polygon_on_image(image, poly_shapely, "test_poly.jpg")
+                for p in poly_sorted:
+                    coco_seg_poly += list(p)
+                # Create a dictionary for the COCO annotation
+                coco_annotation_dict = {
+                    "segmentation": [coco_seg_poly],
+                    "area": area,
+                    "iscrow": 0,
+                    "image_id": i,
+                    "bbox": coco_bb,
+                    "category_id": class_index,
+                    "id": instance_count,
+                }
+                coco_annotation_dict_list.append(coco_annotation_dict)
+                instance_count += 1
+                output_polygons.append([coco_seg_poly, class_index])
+        save_dict["images"].append(img_dict)
+        save_dict["annotations"] += coco_annotation_dict_list
+        # Print the unique class indices in the segmentation mask to verify
+        print(path)
+        print(np.unique(segmentation_mask))
+        filled_mask = fill_mask(segmentation_mask)
+        clean_image = np.array(image)
+        filled_mask_resized = cv2.resize(
+            filled_mask, (clean_image.shape[1], clean_image.shape[0]), interpolation=cv2.INTER_NEAREST
+        )
+        cv2.imwrite(f"{output_aux_dir}/{fn}_fg_mask.png", filled_mask_resized)
+        clean_image = clean_image * np.array(filled_mask_resized[:, :, np.newaxis] / 255.0).astype(bool)
+        clean_image[filled_mask_resized == 0] = 255
+        clean_image = cv2.cvtColor(clean_image, cv2.COLOR_RGB2BGR)
+        # clean_image = to_bw_image(clean_image)
+        cv2.imwrite(f"{output_image_dir}/{new_fn}.png", clean_image)
+        image_with_polygons = draw_polygon_on_image(np.zeros_like(clean_image), output_polygons, CLASS_TO_COLOR)
+        cv2.imwrite(f"{output_aux_dir}/{fn}_polylines.png", image_with_polygons)
+        output_mappings.append(f"{fn} {new_fn}")
+    with open(fn_mapping_log, "w") as f:
+        for mapping in output_mappings:
+            f.write(f"{mapping}\n")
+    # Serialize save_dict to JSON
+    json_path = f"{output_annot_dir}/test.json"
+    with open(json_path, "w") as f:
+        json.dump(save_dict, f, default=convert_numpy_to_python)

datasets/__init__.py ADDED Viewed

	@@ -0,0 +1,67 @@

+from .poly_data import build as build_poly
+def build_dataset(image_set, args):
+    if args.dataset_name in ["stru3d", "cubicasa", "waffle", "r2g"]:
+        print(f"Build {args.dataset_name} {image_set} dataset")
+        return build_poly(image_set, args)
+    raise ValueError(f"dataset {args.dataset_name} not supported")
+def get_dataset_class_labels(dataset_name):
+    semantics_label = None
+    if dataset_name == "stru3d":
+        semantics_label = {
+            0: "Living Room",
+            1: "Kitchen",
+            2: "Bedroom",
+            3: "Bathroom",
+            4: "Balcony",
+            5: "Corridor",
+            6: "Dining room",
+            7: "Study",
+            8: "Studio",
+            9: "Store room",
+            10: "Garden",
+            11: "Laundry room",
+            12: "Office",
+            13: "Basement",
+            14: "Garage",
+            15: "Misc.",
+            16: "Door",
+            17: "Window",
+        }
+    elif dataset_name == "cubicasa":
+        semantics_label = {
+            "Outdoor": 0,
+            "Kitchen": 1,
+            "Living Room": 2,
+            "Bed Room": 3,
+            "Bath": 4,
+            "Entry": 5,
+            "Storage": 6,
+            "Garage": 7,
+            "Undefined": 8,
+            "Window": 9,
+            "Door": 10,
+        }
+    elif dataset_name == "r2g":
+        semantics_label = {
+            "unknown": 0,
+            "living_room": 1,
+            "kitchen": 2,
+            "bedroom": 3,
+            "bathroom": 4,
+            "restroom": 5,
+            "balcony": 6,
+            "closet": 7,
+            "corridor": 8,
+            "washing_room": 9,
+            "PS": 10,
+            "outside": 11,
+        }
+    id2class = {v: k for k, v in semantics_label.items()} if semantics_label else None
+    return semantics_label, id2class

datasets/data_utils.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import matplotlib.pyplot as plt
+import numpy as np
+def compute_centroid(polygon):
+    """Compute centroid of a polygon given as list of (x, y)."""
+    polygon = np.array(polygon)
+    x = np.mean(polygon[:, 0])
+    y = np.mean(polygon[:, 1])
+    return (x, y)
+def get_top_left(polygon):
+    return min(polygon, key=lambda p: (p[1], p[0]))  # y ascending, x ascending
+def sort_polygons(polygons, tolerance=20, reverse=False):
+    # Step 1: Get top-left corner and original index
+    indexed = [(i, get_top_left(p), p) for i, p in enumerate(polygons)]
+    # Step 2: Sort by Y (top to bottom)
+    indexed.sort(key=lambda x: x[1][1])
+    # Step 3: Group into rows
+    rows = []
+    for idx, corner, poly in indexed:
+        y = corner[1]
+        added = False
+        for row in rows:
+            if abs(row[0][1][1] - y) <= tolerance:
+                row.append((idx, corner, poly))
+                added = True
+                break
+        if not added:
+            rows.append([(idx, corner, poly)])
+    # Step 4: Sort each row left-to-right
+    for row in rows:
+        row.sort(key=lambda x: x[1][0])  # sort by x
+    # Step 5: Flatten and return indices
+    sorted_indices = [idx for row in rows for idx, _, _ in row]
+    if reverse:
+        sorted_indices = sorted_indices[::-1]
+    sorted_polygons = [polygons[idx] for idx in sorted_indices]
+    return sorted_polygons, sorted_indices
+def plot_polygons(polygons, save_path):
+    plt.figure(figsize=(6, 6))
+    for i, poly in enumerate(polygons):
+        poly = np.array(poly)
+        plt.fill(poly[:, 0], poly[:, 1], alpha=0.5, label=f"Polygon {i + 1}")
+        centroid = compute_centroid(poly)
+        plt.text(centroid[0], centroid[1], f"C{i + 1}", fontsize=10, ha="center")
+    # plt.title(title)
+    # plt.legend()
+    plt.gca().set_aspect("equal", adjustable="box")
+    plt.savefig(save_path)

datasets/discrete_tokenizer.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import numpy as np
+import torch
+class DiscreteTokenizer(object):
+    def __init__(self, num_bins, seq_len, add_cls=False):
+        self.num_bins = num_bins
+        vocab_size = num_bins * num_bins
+        self.seq_len = seq_len
+        self.add_cls = add_cls
+        self.bos = vocab_size + 0
+        self.eos = vocab_size + 1
+        self.sep = vocab_size + 2
+        self.pad = vocab_size + 3
+        if add_cls:
+            self.cls = vocab_size + 4
+            self.vocab_size = vocab_size + 5
+        else:
+            self.vocab_size = vocab_size + 4
+    def __len__(self):
+        return self.vocab_size
+    def _padding(self, seq, pad_value, dtype):
+        if self.seq_len > len(seq):
+            seq.extend([pad_value] * (self.seq_len - len(seq)))
+        return torch.tensor(np.array(seq), dtype=dtype)
+    def __call__(self, seq, add_bos, add_eos, dtype, return_indices=False):
+        out = []
+        if add_bos:
+            out = [self.bos]
+        num_extra = 1 if not self.add_cls else 2  # cls and sep
+        indices = []
+        for i, sub in enumerate(seq):
+            cur_len = len(out)
+            # Append sub only if it doesn't exceed seq_len
+            if cur_len + len(sub) + num_extra <= self.seq_len:
+                out.extend(sub)
+                indices.append(i)
+            else:
+                continue
+            # Append cls and sep tokens only if it doesn't exceed seq_len
+            if self.add_cls:
+                out.append(self.cls)  # cls token
+            out.append(self.sep)
+        # Remove last separator token if present
+        if out and out[-1] == self.sep:
+            out.pop(-1)  # remove last separator token
+        if self.seq_len > len(out):
+            out.extend([self.pad] * (self.seq_len - len(out)))
+        if add_eos:
+            out[-1] = self.eos
+        if return_indices:
+            return torch.tensor(out, dtype=dtype), indices
+        return torch.tensor(out, dtype=dtype)

datasets/poly_data.py ADDED Viewed

	@@ -0,0 +1,590 @@

+import math
+import os
+from enum import Enum
+from pathlib import Path
+import numpy as np
+import torch
+import torch.utils.data
+import torchvision
+from PIL import Image
+from pycocotools.coco import COCO
+from torch.utils.data import Dataset
+from datasets.data_utils import sort_polygons
+from datasets.discrete_tokenizer import DiscreteTokenizer
+from datasets.transforms import ResizeAndPad
+from detectron2.data import transforms as T
+from detectron2.data.detection_utils import annotations_to_instances, transform_instance_annotations
+from detectron2.structures import BoxMode
+from util.poly_ops import resort_corners
+class TokenType(Enum):
+    """0 for <coord>, 1 for <sep>, 2 for <eos>, 3 for <cls>"""
+    coord = 0
+    sep = 1
+    eos = 2
+    cls = 3
+WD_INDEX = {
+    "stru3d": [16, 17],
+    "cubicasa": [9, 10],
+    "waffle": [],
+    "r2g": [],
+}
+class MultiPoly(Dataset):
+    def __init__(
+        self,
+        img_folder,
+        ann_file,
+        transforms,
+        semantic_classes,
+        dataset_name="",
+        image_norm=False,
+        poly2seq=False,
+        converter_version="v1",
+        random_drop_rate=0.0,
+        **kwargs,
+    ):
+        super(MultiPoly, self).__init__()
+        self.root = img_folder
+        self._transforms = transforms
+        self.semantic_classes = semantic_classes
+        self.dataset_name = dataset_name
+        self.coco = COCO(ann_file)
+        self.ids = list(sorted(self.coco.imgs.keys()))
+        self.poly2seq = poly2seq
+        self.prepare = ConvertToCocoDictWithOrder_plus(
+            self.root,
+            self._transforms,
+            image_norm,
+            poly2seq,
+            semantic_classes=semantic_classes,
+            order_type=["l2r", "r2l"][converter_version == "v3_flipped"],
+            random_drop_rate=random_drop_rate,
+            **kwargs,
+        )
+    def get_image(self, path):
+        return Image.open(os.path.join(self.root, path))
+    def get_vocab_size(self):
+        if self.poly2seq:
+            return len(self.prepare.tokenizer)
+        return None
+    def get_tokenizer(self):
+        if self.poly2seq:
+            return self.prepare.tokenizer
+        return None
+    def __len__(self):
+        return len(self.ids)
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            dict: COCO format dict
+        """
+        coco = self.coco
+        img_id = self.ids[index]
+        ann_ids = coco.getAnnIds(imgIds=img_id)
+        target = coco.loadAnns(ann_ids)
+        ### Note: here is a hack which assumes door/window have category_id 16, 17 in structured3D
+        if self.semantic_classes == -1:
+            if self.dataset_name == "stru3d":
+                target = [t for t in target if t["category_id"] not in WD_INDEX["stru3d"]]
+            # elif self.dataset_name == 'rplan':
+            #     target = [t for t in target if t['category_id'] not in [9, 11]]
+            elif self.dataset_name == "cubicasa":
+                target = [t for t in target if t["category_id"] not in WD_INDEX["cubicasa"]]
+        path = coco.loadImgs(img_id)[0]["file_name"]
+        record = self.prepare(img_id, path, target)
+        return record
+class MultiPolyWD(MultiPoly):
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            dict: COCO format dict
+        """
+        coco = self.coco
+        img_id = self.ids[index]
+        ann_ids = coco.getAnnIds(imgIds=img_id)
+        target = coco.loadAnns(ann_ids)
+        ### Note: here is a hack which assumes door/window have category_id 16, 17 in structured3D
+        # if self.semantic_classes == -1:
+        #     if self.dataset_name == 'stru3d':
+        #         target = [t for t in target if t['category_id'] not in [16, 17]]
+        #     elif self.dataset_name == 'rplan':
+        #         target = [t for t in target if t['category_id'] not in [9, 11]]
+        #     elif self.dataset_name == 'cubicasa':
+        #         target = [t for t in target if t['category_id'] not in [9, 10]]
+        if self.dataset_name == "stru3d":
+            target = [t for t in target if t["category_id"] in [16, 17]]
+        elif self.dataset_name == "rplan":
+            target = [t for t in target if t["category_id"] in [9, 11]]
+        elif self.dataset_name == "cubicasa":
+            target = [t for t in target if t["category_id"] in [9, 10]]
+        path = coco.loadImgs(img_id)[0]["file_name"]
+        record = self.prepare(img_id, path, target)
+        return record
+class ConvertToCocoDict(object):
+    def __init__(
+        self,
+        root,
+        augmentations,
+        image_norm,
+        poly2seq=False,
+        semantic_classes=-1,
+        add_cls_token=False,
+        per_token_class=False,
+        mask_format="polygon",
+        **kwargs,
+    ):
+        self.root = root
+        self.augmentations = augmentations
+        if image_norm:
+            self.image_normalize = torchvision.transforms.Normalize(
+                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+            )
+        else:
+            self.image_normalize = None
+        self.semantic_classes = semantic_classes
+        self.poly2seq = poly2seq
+        if poly2seq:
+            self.tokenizer = DiscreteTokenizer(add_cls=add_cls_token, **kwargs)
+            self.add_cls_token = add_cls_token
+        self.per_token_class = per_token_class
+        self.mask_format = mask_format
+    def _expand_image_dims(self, x):
+        if len(x.shape) == 2:
+            exp_img = np.expand_dims(x, 0)
+        else:
+            exp_img = x.transpose((2, 0, 1))  # (h,w,c) -> (c,h,w)
+        return exp_img
+    def __call__(self, img_id, path, target):
+        file_name = os.path.join(self.root, path)
+        img = np.array(Image.open(file_name))
+        #### NEW
+        if len(img.shape) >= 3:
+            if img.shape[-1] > 3:  # drop alpha channel
+                img = img[:, :, :3]
+            w, h = img.shape[:-1]
+        else:
+            # print(img.shape, file_name)
+            w, h = img.shape
+        #### NEW
+        record = {}
+        record["file_name"] = file_name
+        record["height"] = h
+        record["width"] = w
+        record["image_id"] = img_id
+        for obj in target:
+            obj["bbox_mode"] = BoxMode.XYWH_ABS
+        record["annotations"] = target
+        if self.augmentations is None:
+            record["image"] = (1 / 255) * torch.as_tensor(np.ascontiguousarray(self._expand_image_dims(img)))
+            record["instances"] = annotations_to_instances(target, (h, w), mask_format=self.mask_format)
+        else:
+            aug_input = T.AugInput(img)
+            transforms = self.augmentations(aug_input)
+            image = aug_input.image
+            record["image"] = (1 / 255) * torch.as_tensor(np.array(self._expand_image_dims(image)))
+            h, w = image.shape[:2]  # update size
+            annos = [
+                transform_instance_annotations(obj, transforms, image.shape[:2])
+                for obj in record.pop("annotations")
+                if obj.get("iscrowd", 0) == 0
+            ]
+            # resort corners after augmentation: so that all corners start from upper-left counterclockwise
+            for anno in annos:
+                anno["segmentation"][0] = resort_corners(anno["segmentation"][0])
+            record["instances"] = annotations_to_instances(annos, (h, w), mask_format=self.mask_format)
+        #### NEW ####
+        if self.image_normalize is not None:
+            record["image"] = self.image_normalize(record["image"])
+        # convert polygons to sequences
+        if self.poly2seq:
+            # only happend for wdonly
+            if not hasattr(record["instances"], "gt_masks"):
+                polygons = [np.array([[0.0, 0.0]])]
+                polygons_label = [self.semantic_classes - 1]  # dummy class
+            else:
+                polygons = [
+                    np.clip(np.array(inst).reshape(-1, 2) / (w - 1), 0, 1)
+                    for inst in record["instances"].gt_masks.polygons
+                ]
+                polygons_label = [inst.item() for inst in record["instances"].gt_classes]
+            record.update(
+                self._get_bilinear_interpolation_coeffs(
+                    polygons, polygons_label, self.add_cls_token, self.per_token_class
+                )
+            )
+        return record
+    def _get_bilinear_interpolation_coeffs(self, polygons, polygons_label, add_cls_token=False, per_token_class=False):
+        num_bins = self.tokenizer.num_bins
+        quant_poly = [poly * (num_bins - 1) for poly in polygons]
+        index11 = [[math.floor(p[0]) * num_bins + math.floor(p[1]) for p in poly] for poly in quant_poly]
+        index21 = [[math.ceil(p[0]) * num_bins + math.floor(p[1]) for p in poly] for poly in quant_poly]
+        index12 = [[math.floor(p[0]) * num_bins + math.ceil(p[1]) for p in poly] for poly in quant_poly]
+        index22 = [[math.ceil(p[0]) * num_bins + math.ceil(p[1]) for p in poly] for poly in quant_poly]
+        seq11 = self.tokenizer(index11, add_bos=True, add_eos=False, dtype=torch.long)
+        seq21 = self.tokenizer(index21, add_bos=True, add_eos=False, dtype=torch.long)
+        seq12 = self.tokenizer(index12, add_bos=True, add_eos=False, dtype=torch.long)
+        seq22 = self.tokenizer(index22, add_bos=True, add_eos=False, dtype=torch.long)
+        # in real values insteads
+        target_seq = []
+        token_labels = []  # 0 for <coord>, 1 for <sep>, 2 for <eos>, 3 for <cls>
+        num_extra = 1 if not add_cls_token else 2  # cls and sep
+        count_polys = 0
+        for poly in polygons:
+            cur_len = len(token_labels)
+            if cur_len + len(poly) + num_extra > self.tokenizer.seq_len:
+                break  # INFO: change from break to continue
+            token_labels.extend([TokenType.coord.value] * len(poly))
+            if add_cls_token:
+                token_labels.append(TokenType.cls.value)  # cls token
+            token_labels.append(TokenType.sep.value)  # separator token
+            target_seq.extend(poly)
+            if add_cls_token:
+                target_seq.append([0, 0])  # padding for cls token
+            target_seq.append([0, 0])  # padding for sep/end token
+            count_polys += 1
+        # remove last separator token
+        if len(token_labels) > 0:
+            token_labels[-1] = TokenType.eos.value
+        mask = torch.ones(self.tokenizer.seq_len, dtype=torch.bool)
+        if len(token_labels) < self.tokenizer.seq_len:
+            mask[len(token_labels) :] = 0
+        target_seq = self.tokenizer._padding(target_seq, [0, 0], dtype=torch.float32)
+        token_labels = self.tokenizer._padding(token_labels, -1, dtype=torch.long)
+        delta_x1 = [0]  # [0] for bos token
+        for polygon in quant_poly[:count_polys]:
+            delta = [poly_point[0] - math.floor(poly_point[0]) for poly_point in polygon]
+            delta_x1.extend(delta)
+            if add_cls_token:
+                delta_x1.extend([0])  # for cls token
+            delta_x1.extend([0])  # for separator token
+        delta_x1 = delta_x1[:-1]  # there is no separator token in the end
+        delta_x1 = self.tokenizer._padding(delta_x1, 0, dtype=torch.float32)
+        delta_x2 = 1 - delta_x1
+        delta_y1 = [0]  # [0] for bos token
+        for polygon in quant_poly[:count_polys]:
+            delta = [poly_point[1] - math.floor(poly_point[1]) for poly_point in polygon]
+            delta_y1.extend(delta)
+            if add_cls_token:
+                delta_y1.extend([0])  # for cls token
+            delta_y1.extend([0])  # for separator token
+        delta_y1 = delta_y1[:-1]  # there is no separator token in the end
+        delta_y1 = self.tokenizer._padding(delta_y1, 0, dtype=torch.float32)
+        delta_y2 = 1 - delta_y1
+        if not per_token_class:
+            target_polygon_labels = polygons_label[:count_polys]
+        else:
+            target_polygon_labels = []
+            for poly, poly_label in zip(quant_poly[:count_polys], polygons_label[:count_polys]):
+                target_polygon_labels.extend([poly_label] * len(poly))
+                target_polygon_labels.append(self.semantic_classes - 1)  # undefined class for <sep> and <eos> token
+        max_label_length = self.tokenizer.seq_len
+        if len(polygons_label) < max_label_length:
+            target_polygon_labels.extend([-1] * (max_label_length - len(target_polygon_labels)))
+        target_polygon_labels = torch.tensor(target_polygon_labels, dtype=torch.long)
+        return {
+            "delta_x1": delta_x1,
+            "delta_x2": delta_x2,
+            "delta_y1": delta_y1,
+            "delta_y2": delta_y2,
+            "seq11": seq11,
+            "seq21": seq21,
+            "seq12": seq12,
+            "seq22": seq22,
+            "target_seq": target_seq,
+            "token_labels": token_labels,
+            "mask": mask,
+            "target_polygon_labels": target_polygon_labels,
+        }
+class ConvertToCocoDictWithOrder_plus(ConvertToCocoDict):
+    def __init__(
+        self,
+        root,
+        augmentations,
+        image_norm,
+        poly2seq=False,
+        semantic_classes=-1,
+        add_cls_token=False,
+        per_token_class=False,
+        mask_format="polygon",
+        dataset_name="stru3d",
+        order_type="l2r",
+        random_drop_rate=0.0,
+        **kwargs,
+    ):
+        super().__init__(
+            root,
+            augmentations,
+            image_norm,
+            poly2seq,
+            semantic_classes,
+            add_cls_token,
+            per_token_class,
+            mask_format,
+            **kwargs,
+        )
+        self.dataset_name = dataset_name
+        self.order_type = order_type  # l2r, r2l
+        self.random_drop_rate = random_drop_rate
+        self.tokenizer = DiscreteTokenizer(add_cls=add_cls_token, **kwargs)
+    def _get_bilinear_interpolation_coeffs(self, polygons, polygons_label, add_cls_token=False, per_token_class=False):
+        num_bins = self.tokenizer.num_bins
+        room_indices = [
+            poly_idx
+            for poly_idx, poly_label in enumerate(polygons_label)
+            if poly_label not in WD_INDEX[self.dataset_name]
+        ]
+        wd_indices = [
+            poly_idx for poly_idx, poly_label in enumerate(polygons_label) if poly_label in WD_INDEX[self.dataset_name]
+        ]
+        _, room_sorted_indices = sort_polygons(
+            [polygons[poly_idx] for poly_idx in room_indices], reverse=(self.order_type == "r2l")
+        )
+        _, wd_sorted_indices = sort_polygons(
+            [polygons[poly_idx] for poly_idx in wd_indices], reverse=(self.order_type == "r2l")
+        )
+        room_indices = [room_indices[_idx] for _idx in room_sorted_indices]
+        wd_indices = [wd_indices[_idx] for _idx in wd_sorted_indices]
+        #### NEW ####
+        combined_indices = room_indices + wd_indices  # room first
+        if self.random_drop_rate > 0 and len(combined_indices) > 2:
+            keep_indices = np.where(np.random.rand(len(combined_indices)) >= self.random_drop_rate)[0].tolist()
+            if len(keep_indices) > 0:  # Only apply drop if we have something left
+                combined_indices = [combined_indices[i] for i in keep_indices]
+        #### NEW ####
+        polygons = [polygons[i] for i in combined_indices]
+        polygons_label = [polygons_label[i] for i in combined_indices]
+        quant_poly = [poly * (num_bins - 1) for poly in polygons]
+        index11 = [[math.floor(p[0]) * num_bins + math.floor(p[1]) for p in poly] for poly in quant_poly]
+        index21 = [[math.ceil(p[0]) * num_bins + math.floor(p[1]) for p in poly] for poly in quant_poly]
+        index12 = [[math.floor(p[0]) * num_bins + math.ceil(p[1]) for p in poly] for poly in quant_poly]
+        index22 = [[math.ceil(p[0]) * num_bins + math.ceil(p[1]) for p in poly] for poly in quant_poly]
+        seq11 = self.tokenizer(index11, add_bos=True, add_eos=False, dtype=torch.long)
+        seq21 = self.tokenizer(index21, add_bos=True, add_eos=False, dtype=torch.long)
+        seq12 = self.tokenizer(index12, add_bos=True, add_eos=False, dtype=torch.long)
+        seq22, poly_indices = self.tokenizer(
+            index22, add_bos=True, add_eos=False, dtype=torch.long, return_indices=True
+        )
+        # in real values insteads
+        target_seq = []
+        token_labels = []  # 0 for <coord>, 1 for <sep>, 2 for <eos>, 3 for <cls>
+        for i in poly_indices:
+            token_labels.extend([TokenType.coord.value] * len(polygons[i]))
+            if add_cls_token:
+                token_labels.append(TokenType.cls.value)  # cls token
+            token_labels.append(TokenType.sep.value)  # separator token
+            target_seq.extend(polygons[i])
+            if add_cls_token:
+                target_seq.append([0, 0])  # padding for cls token
+            target_seq.append([0, 0])  # padding for sep/end token
+        # remove last separator token
+        token_labels[-1] = TokenType.eos.value
+        mask = torch.ones(self.tokenizer.seq_len, dtype=torch.bool)
+        if len(token_labels) < self.tokenizer.seq_len:
+            mask[len(token_labels) :] = 0
+        target_seq = self.tokenizer._padding(target_seq, [0, 0], dtype=torch.float32)
+        token_labels = self.tokenizer._padding(token_labels, -1, dtype=torch.long)
+        delta_x1 = [0]  # [0] for bos token
+        for i in poly_indices:
+            polygon = quant_poly[i]
+            delta = [poly_point[0] - math.floor(poly_point[0]) for poly_point in polygon]
+            delta_x1.extend(delta)
+            if add_cls_token:
+                delta_x1.extend([0])  # for cls token
+            delta_x1.extend([0])  # for separator token
+        delta_x1 = delta_x1[:-1]  # there is no separator token in the end
+        delta_x1 = self.tokenizer._padding(delta_x1, 0, dtype=torch.float32)
+        delta_x2 = 1 - delta_x1
+        delta_y1 = [0]  # [0] for bos token
+        for i in poly_indices:
+            polygon = quant_poly[i]
+            delta = [poly_point[1] - math.floor(poly_point[1]) for poly_point in polygon]
+            delta_y1.extend(delta)
+            if add_cls_token:
+                delta_y1.extend([0])  # for cls token
+            delta_y1.extend([0])  # for separator token
+        delta_y1 = delta_y1[:-1]  # there is no separator token in the end
+        delta_y1 = self.tokenizer._padding(delta_y1, 0, dtype=torch.float32)
+        delta_y2 = 1 - delta_y1
+        if not per_token_class:
+            target_polygon_labels = [polygons_label[i] for i in poly_indices]  # polygons_label[:count_polys]
+            input_polygon_labels = torch.tensor(target_polygon_labels.copy(), dtype=torch.long)
+        else:
+            target_polygon_labels = []
+            for i in poly_indices:
+                poly, poly_label = quant_poly[i], polygons_label[i]
+                target_polygon_labels.extend([poly_label] * len(poly))
+                target_polygon_labels.append(self.semantic_classes - 1)  # undefined class for <sep> and <eos> token
+            input_polygon_labels = torch.tensor(
+                [self.semantic_classes - 1] + target_polygon_labels.copy()[:-1], dtype=torch.long
+            )  # right shift by one: <bos>, ..., <coord>
+        max_label_length = self.tokenizer.seq_len
+        if len(polygons_label) < max_label_length:
+            target_polygon_labels.extend([-1] * (max_label_length - len(target_polygon_labels)))
+        target_polygon_labels = torch.tensor(target_polygon_labels, dtype=torch.long)
+        return {
+            "delta_x1": delta_x1,
+            "delta_x2": delta_x2,
+            "delta_y1": delta_y1,
+            "delta_y2": delta_y2,
+            "seq11": seq11,
+            "seq21": seq21,
+            "seq12": seq12,
+            "seq22": seq22,
+            "target_seq": target_seq,
+            "token_labels": token_labels,
+            "mask": mask,
+            "target_polygon_labels": target_polygon_labels,
+            "input_polygon_labels": input_polygon_labels,
+        }
+def make_poly_transforms(dataset_name, image_set, image_size=256, disable_image_transform=False):
+    trans_list = []
+    if dataset_name in ["cubicasa", "waffle"] or (dataset_name == "r2g" and image_size != 512):
+        trans_list = [ResizeAndPad((image_size, image_size), pad_value=255)]
+    if image_set == "train":
+        if not disable_image_transform:
+            trans_list.extend(
+                [
+                    T.RandomFlip(prob=0.5, horizontal=True, vertical=False),
+                    T.RandomFlip(prob=0.5, horizontal=False, vertical=True),
+                    T.RandomRotation([0.0, 90.0, 180.0, 270.0], expand=False, center=None, sample_style="choice"),
+                ]
+            )
+        return T.AugmentationList(trans_list)
+    if image_set == "val" or image_set == "test":
+        return None if len(trans_list) == 0 else T.AugmentationList(trans_list)
+    raise ValueError(f"unknown {image_set}")
+def build(image_set, args):
+    root = Path(args.dataset_root)
+    assert root.exists(), f"provided data path {root} does not exist"
+    PATHS = {
+        "train": (root / "train", root / "annotations" / "train.json"),
+        "val": (root / "val", root / "annotations" / "val.json"),
+        "test": (root / "test", root / "annotations" / "test.json"),
+    }
+    img_folder, ann_file = PATHS[image_set]
+    image_transform = make_poly_transforms(
+        args.dataset_name,
+        image_set,
+        image_size=args.image_size,
+        disable_image_transform=getattr(args, "disable_image_transform", False),
+    )
+    if args.wd_only:
+        dataset = MultiPolyWD(
+            img_folder,
+            ann_file,
+            transforms=image_transform,
+            semantic_classes=args.semantic_classes,
+            dataset_name=args.dataset_name,
+            image_norm=args.image_norm,
+            poly2seq=args.poly2seq,
+            num_bins=args.num_bins,
+            seq_len=args.seq_len,
+            add_cls_token=args.add_cls_token,
+            per_token_class=args.per_token_sem_loss,
+            mask_format=getattr(args, "mask_format", "polygon"),
+        )
+    else:
+        dataset = MultiPoly(
+            img_folder,
+            ann_file,
+            transforms=image_transform,
+            semantic_classes=args.semantic_classes,
+            dataset_name=args.dataset_name,
+            image_norm=args.image_norm,
+            poly2seq=args.poly2seq,
+            num_bins=args.num_bins,
+            seq_len=args.seq_len,
+            add_cls_token=args.add_cls_token,
+            per_token_class=args.per_token_sem_loss,
+            mask_format=getattr(args, "mask_format", "polygon"),
+            converter_version=getattr(args, "converter_version", "v1"),
+            random_drop_rate=getattr(args, "random_drop_rate", 0.0),
+        )
+    return dataset

datasets/room_dropout.py ADDED Viewed

	@@ -0,0 +1,237 @@

+import random
+from typing import List, Optional, Tuple
+import cv2
+import numpy as np
+from skimage.draw import polygon
+class RoomDropoutStrategy:
+    """
+    Strategy for randomly dropping rooms from a density map using ground truth coordinates.
+    Density map: grayscale image where foreground (rooms) are white points and background is black
+    GT room coordinates: list of 2D points defining each room's boundary
+    """
+    def __init__(self, density_map: np.ndarray, room_coordinates: List[List[Tuple[int, int]]]):
+        """
+        Initialize the dropout strategy.
+        Args:
+            density_map: Grayscale image (H, W) where white pixels represent rooms
+            room_coordinates: List of rooms, each room is a list of (x, y) coordinate tuples
+        """
+        self.original_density_map = density_map.copy()
+        self.room_coordinates = room_coordinates
+        self.num_rooms = len(room_coordinates)
+    def create_room_masks(self) -> List[np.ndarray]:
+        """
+        Create binary masks for each room using their GT coordinates.
+        Returns:
+            List of binary masks, one for each room
+        """
+        h, w = self.original_density_map.shape
+        room_masks = []
+        for room_coords in self.room_coordinates:
+            mask = np.zeros((h, w), dtype=np.uint8)
+            if len(room_coords) >= 3:  # Need at least 3 points for a polygon
+                # Convert coordinates to numpy array
+                coords = np.array(room_coords)
+                x_coords = coords[:, 0]
+                y_coords = coords[:, 1]
+                # Create polygon mask using skimage
+                rr, cc = polygon(y_coords, x_coords, shape=(h, w))
+                mask[rr, cc] = 1
+            room_masks.append(mask)
+        return room_masks
+    def drop_rooms_random(self, dropout_rate: float = 0.3, seed: Optional[int] = None) -> Tuple[np.ndarray, List[int]]:
+        """
+        Randomly drop rooms from the density map.
+        Args:
+            dropout_rate: Fraction of rooms to drop (0.0 to 1.0)
+            seed: Random seed for reproducibility
+        Returns:
+            Tuple of (modified_density_map, list_of_dropped_room_indices)
+        """
+        if seed is not None:
+            random.seed(seed)
+            np.random.seed(seed)
+        # Determine number of rooms to drop
+        num_to_drop = int(self.num_rooms * dropout_rate)
+        # Randomly select room indices to drop
+        room_indices = list(range(self.num_rooms))
+        dropped_indices = random.sample(room_indices, num_to_drop)
+        return self._apply_dropout(dropped_indices), dropped_indices
+    def drop_rooms_by_indices(self, room_indices: List[int]) -> np.ndarray:
+        """
+        Drop specific rooms by their indices.
+        Args:
+            room_indices: List of room indices to drop
+        Returns:
+            Modified density map with specified rooms removed
+        """
+        return self._apply_dropout(room_indices)
+    def drop_rooms_by_area(
+        self, min_area: Optional[int] = None, max_area: Optional[int] = None
+    ) -> Tuple[np.ndarray, List[int]]:
+        """
+        Drop rooms based on their area constraints.
+        Args:
+            min_area: Minimum area threshold (drop rooms smaller than this)
+            max_area: Maximum area threshold (drop rooms larger than this)
+        Returns:
+            Tuple of (modified_density_map, list_of_dropped_room_indices)
+        """
+        room_masks = self.create_room_masks()
+        dropped_indices = []
+        for i, mask in enumerate(room_masks):
+            area = np.sum(mask)
+            should_drop = False
+            if min_area is not None and area < min_area:
+                should_drop = True
+            if max_area is not None and area > max_area:
+                should_drop = True
+            if should_drop:
+                dropped_indices.append(i)
+        return self._apply_dropout(dropped_indices), dropped_indices
+    def _apply_dropout(self, room_indices_to_drop: List[int]) -> np.ndarray:
+        """
+        Apply dropout by removing specified rooms from the density map.
+        Args:
+            room_indices_to_drop: List of room indices to remove
+        Returns:
+            Modified density map with rooms removed
+        """
+        modified_map = self.original_density_map.copy()
+        room_masks = self.create_room_masks()
+        # Remove each specified room
+        for room_idx in room_indices_to_drop:
+            if 0 <= room_idx < len(room_masks):
+                mask = room_masks[room_idx]
+                # Set pixels in the room area to background (black/0)
+                modified_map[mask == 1] = 0
+        return modified_map
+    def visualize_dropout(
+        self, original_map: np.ndarray, modified_map: np.ndarray, dropped_indices: List[int]
+    ) -> np.ndarray:
+        """
+        Create a visualization showing the dropout effect.
+        Args:
+            original_map: Original density map
+            modified_map: Modified density map after dropout
+            dropped_indices: Indices of dropped rooms
+        Returns:
+            Visualization image with original and modified maps side by side
+        """
+        h, w = original_map.shape
+        # Create side-by-side comparison
+        vis = np.zeros((h, w * 2), dtype=np.uint8)
+        vis[:, :w] = original_map
+        vis[:, w:] = modified_map
+        # Highlight dropped rooms in red on the original map
+        if len(dropped_indices) > 0:
+            room_masks = self.create_room_masks()
+            vis_color = cv2.cvtColor(vis, cv2.COLOR_GRAY2BGR)
+            for idx in dropped_indices:
+                if 0 <= idx < len(room_masks):
+                    mask = room_masks[idx]
+                    # Highlight in red on the left (original) side
+                    vis_color[mask == 1, 0] = 0  # Blue channel
+                    vis_color[mask == 1, 1] = 0  # Green channel
+                    vis_color[mask == 1, 2] = 255  # Red channel
+            return vis_color
+        return cv2.cvtColor(vis, cv2.COLOR_GRAY2BGR)
+# Example usage and testing
+def example_usage():
+    """
+    Example of how to use the RoomDropoutStrategy class.
+    """
+    # Create a sample density map (200x200 image)
+    density_map = np.zeros((200, 200), dtype=np.uint8)
+    # Create some sample room coordinates (rectangles and polygons)
+    room_coordinates = [
+        # Room 1: Rectangle
+        [(20, 20), (80, 20), (80, 60), (20, 60)],
+        # Room 2: Another rectangle
+        [(100, 30), (180, 30), (180, 80), (100, 80)],
+        # Room 3: L-shaped room
+        [(30, 100), (90, 100), (90, 130), (60, 130), (60, 160), (30, 160)],
+        # Room 4: Triangle
+        [(120, 120), (160, 120), (140, 160)],
+        # Room 5: Pentagon
+        [(50, 180), (70, 170), (90, 180), (80, 195), (40, 195)],
+    ]
+    # Fill the density map with white pixels for each room
+    for room_coords in room_coordinates:
+        coords = np.array(room_coords)
+        x_coords = coords[:, 0]
+        y_coords = coords[:, 1]
+        from skimage.draw import polygon
+        rr, cc = polygon(y_coords, x_coords, shape=density_map.shape)
+        density_map[rr, cc] = 255  # White pixels for rooms
+    # Initialize the dropout strategy
+    dropout_strategy = RoomDropoutStrategy(density_map, room_coordinates)
+    # Example 1: Random dropout
+    print("Example 1: Random dropout (30% of rooms)")
+    modified_map1, dropped_indices1 = dropout_strategy.drop_rooms_random(dropout_rate=0.3, seed=42)
+    print(f"Dropped rooms: {dropped_indices1}")
+    # Example 2: Drop specific rooms
+    print("\nExample 2: Drop specific rooms (indices 0 and 2)")
+    modified_map2 = dropout_strategy.drop_rooms_by_indices([0, 2])
+    # Example 3: Drop rooms by area
+    print("\nExample 3: Drop rooms with area > 3000 pixels")
+    modified_map3, dropped_indices3 = dropout_strategy.drop_rooms_by_area(max_area=3000)
+    print(f"Dropped rooms by area: {dropped_indices3}")
+    return density_map, modified_map1, modified_map2, modified_map3
+if __name__ == "__main__":
+    example_usage()

datasets/transforms.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from PIL import Image
+from detectron2.data import transforms as T
+class Resize(T.Augmentation):
+    """Resize image to a fixed target size"""
+    def __init__(self, shape, interp=Image.BICUBIC):
+        """
+        Args:
+            shape: (h, w) tuple or a int
+            interp: PIL interpolation method
+        """
+        if isinstance(shape, int):
+            shape = (shape, shape)
+        shape = tuple(shape)
+        self._init(locals())
+    def get_transform(self, image):
+        return T.ResizeTransform(image.shape[0], image.shape[1], self.shape[0], self.shape[1], self.interp)
+# Custom transform that resizes and then pads to fixed size
+class ResizeAndPad(T.Augmentation):
+    def __init__(self, target_size, pad_value=0, interp=Image.BICUBIC):
+        super().__init__()
+        self.target_size = target_size  # (height, width)
+        self.interp = interp
+        self.pad_value = pad_value
+    def get_transform(self, img):
+        h, w = img.shape[:2]
+        scale = min(self.target_size[0] / h, self.target_size[1] / w)
+        new_h, new_w = int(h * scale), int(w * scale)
+        # First resize preserving aspect ratio
+        resize_t = T.ResizeTransform(h, w, new_h, new_w, self.interp)
+        # Then pad to target size
+        pad_h, pad_w = self.target_size[0] - new_h, self.target_size[1] - new_w
+        top = pad_h // 2
+        left = pad_w // 2
+        pad_t = T.PadTransform(left, top, pad_w - left, pad_h - top, new_h, new_w, pad_value=self.pad_value)
+        return T.TransformList([resize_t, pad_t])

detectron2/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+from .utils.env import setup_environment
+setup_environment()
+# This line will be programatically read/write by setup.py.
+# Leave them at the bottom of this file and don't touch them.
+__version__ = "0.6"

detectron2/checkpoint/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+# File:
+from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer
+from . import catalog as _UNUSED  # register the handler
+from .detection_checkpoint import DetectionCheckpointer
+__all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"]

detectron2/checkpoint/c2_model_loading.py ADDED Viewed

	@@ -0,0 +1,387 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import logging
+import re
+from typing import Dict, List
+import torch
+from tabulate import tabulate
+def convert_basic_c2_names(original_keys):
+    """
+    Apply some basic name conversion to names in C2 weights.
+    It only deals with typical backbone models.
+    Args:
+        original_keys (list[str]):
+    Returns:
+        list[str]: The same number of strings matching those in original_keys.
+    """
+    layer_keys = copy.deepcopy(original_keys)
+    layer_keys = [
+        {"pred_b": "linear_b", "pred_w": "linear_w"}.get(k, k) for k in layer_keys
+    ]  # some hard-coded mappings
+    layer_keys = [k.replace("_", ".") for k in layer_keys]
+    layer_keys = [re.sub("\\.b$", ".bias", k) for k in layer_keys]
+    layer_keys = [re.sub("\\.w$", ".weight", k) for k in layer_keys]
+    # Uniform both bn and gn names to "norm"
+    layer_keys = [re.sub("bn\\.s$", "norm.weight", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.bias$", "norm.bias", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.rm", "norm.running_mean", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.running.mean$", "norm.running_mean", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.riv$", "norm.running_var", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.running.var$", "norm.running_var", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.gamma$", "norm.weight", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.beta$", "norm.bias", k) for k in layer_keys]
+    layer_keys = [re.sub("gn\\.s$", "norm.weight", k) for k in layer_keys]
+    layer_keys = [re.sub("gn\\.bias$", "norm.bias", k) for k in layer_keys]
+    # stem
+    layer_keys = [re.sub("^res\\.conv1\\.norm\\.", "conv1.norm.", k) for k in layer_keys]
+    # to avoid mis-matching with "conv1" in other components (e.g. detection head)
+    layer_keys = [re.sub("^conv1\\.", "stem.conv1.", k) for k in layer_keys]
+    # layer1-4 is used by torchvision, however we follow the C2 naming strategy (res2-5)
+    # layer_keys = [re.sub("^res2.", "layer1.", k) for k in layer_keys]
+    # layer_keys = [re.sub("^res3.", "layer2.", k) for k in layer_keys]
+    # layer_keys = [re.sub("^res4.", "layer3.", k) for k in layer_keys]
+    # layer_keys = [re.sub("^res5.", "layer4.", k) for k in layer_keys]
+    # blocks
+    layer_keys = [k.replace(".branch1.", ".shortcut.") for k in layer_keys]
+    layer_keys = [k.replace(".branch2a.", ".conv1.") for k in layer_keys]
+    layer_keys = [k.replace(".branch2b.", ".conv2.") for k in layer_keys]
+    layer_keys = [k.replace(".branch2c.", ".conv3.") for k in layer_keys]
+    # DensePose substitutions
+    layer_keys = [re.sub("^body.conv.fcn", "body_conv_fcn", k) for k in layer_keys]
+    layer_keys = [k.replace("AnnIndex.lowres", "ann_index_lowres") for k in layer_keys]
+    layer_keys = [k.replace("Index.UV.lowres", "index_uv_lowres") for k in layer_keys]
+    layer_keys = [k.replace("U.lowres", "u_lowres") for k in layer_keys]
+    layer_keys = [k.replace("V.lowres", "v_lowres") for k in layer_keys]
+    return layer_keys
+def convert_c2_detectron_names(weights):
+    """
+    Map Caffe2 Detectron weight names to Detectron2 names.
+    Args:
+        weights (dict): name -> tensor
+    Returns:
+        dict: detectron2 names -> tensor
+        dict: detectron2 names -> C2 names
+    """
+    logger = logging.getLogger(__name__)
+    logger.info("Renaming Caffe2 weights ......")
+    original_keys = sorted(weights.keys())
+    layer_keys = copy.deepcopy(original_keys)
+    layer_keys = convert_basic_c2_names(layer_keys)
+    # --------------------------------------------------------------------------
+    # RPN hidden representation conv
+    # --------------------------------------------------------------------------
+    # FPN case
+    # In the C2 model, the RPN hidden layer conv is defined for FPN level 2 and then
+    # shared for all other levels, hence the appearance of "fpn2"
+    layer_keys = [k.replace("conv.rpn.fpn2", "proposal_generator.rpn_head.conv") for k in layer_keys]
+    # Non-FPN case
+    layer_keys = [k.replace("conv.rpn", "proposal_generator.rpn_head.conv") for k in layer_keys]
+    # --------------------------------------------------------------------------
+    # RPN box transformation conv
+    # --------------------------------------------------------------------------
+    # FPN case (see note above about "fpn2")
+    layer_keys = [k.replace("rpn.bbox.pred.fpn2", "proposal_generator.rpn_head.anchor_deltas") for k in layer_keys]
+    layer_keys = [
+        k.replace("rpn.cls.logits.fpn2", "proposal_generator.rpn_head.objectness_logits") for k in layer_keys
+    ]
+    # Non-FPN case
+    layer_keys = [k.replace("rpn.bbox.pred", "proposal_generator.rpn_head.anchor_deltas") for k in layer_keys]
+    layer_keys = [k.replace("rpn.cls.logits", "proposal_generator.rpn_head.objectness_logits") for k in layer_keys]
+    # --------------------------------------------------------------------------
+    # Fast R-CNN box head
+    # --------------------------------------------------------------------------
+    layer_keys = [re.sub("^bbox\\.pred", "bbox_pred", k) for k in layer_keys]
+    layer_keys = [re.sub("^cls\\.score", "cls_score", k) for k in layer_keys]
+    layer_keys = [re.sub("^fc6\\.", "box_head.fc1.", k) for k in layer_keys]
+    layer_keys = [re.sub("^fc7\\.", "box_head.fc2.", k) for k in layer_keys]
+    # 4conv1fc head tensor names: head_conv1_w, head_conv1_gn_s
+    layer_keys = [re.sub("^head\\.conv", "box_head.conv", k) for k in layer_keys]
+    # --------------------------------------------------------------------------
+    # FPN lateral and output convolutions
+    # --------------------------------------------------------------------------
+    def fpn_map(name):
+        """
+        Look for keys with the following patterns:
+        1) Starts with "fpn.inner."
+           Example: "fpn.inner.res2.2.sum.lateral.weight"
+           Meaning: These are lateral pathway convolutions
+        2) Starts with "fpn.res"
+           Example: "fpn.res2.2.sum.weight"
+           Meaning: These are FPN output convolutions
+        """
+        splits = name.split(".")
+        norm = ".norm" if "norm" in splits else ""
+        if name.startswith("fpn.inner."):
+            # splits example: ['fpn', 'inner', 'res2', '2', 'sum', 'lateral', 'weight']
+            stage = int(splits[2][len("res") :])
+            return "fpn_lateral{}{}.{}".format(stage, norm, splits[-1])
+        elif name.startswith("fpn.res"):
+            # splits example: ['fpn', 'res2', '2', 'sum', 'weight']
+            stage = int(splits[1][len("res") :])
+            return "fpn_output{}{}.{}".format(stage, norm, splits[-1])
+        return name
+    layer_keys = [fpn_map(k) for k in layer_keys]
+    # --------------------------------------------------------------------------
+    # Mask R-CNN mask head
+    # --------------------------------------------------------------------------
+    # roi_heads.StandardROIHeads case
+    layer_keys = [k.replace(".[mask].fcn", "mask_head.mask_fcn") for k in layer_keys]
+    layer_keys = [re.sub("^\\.mask\\.fcn", "mask_head.mask_fcn", k) for k in layer_keys]
+    layer_keys = [k.replace("mask.fcn.logits", "mask_head.predictor") for k in layer_keys]
+    # roi_heads.Res5ROIHeads case
+    layer_keys = [k.replace("conv5.mask", "mask_head.deconv") for k in layer_keys]
+    # --------------------------------------------------------------------------
+    # Keypoint R-CNN head
+    # --------------------------------------------------------------------------
+    # interestingly, the keypoint head convs have blob names that are simply "conv_fcnX"
+    layer_keys = [k.replace("conv.fcn", "roi_heads.keypoint_head.conv_fcn") for k in layer_keys]
+    layer_keys = [k.replace("kps.score.lowres", "roi_heads.keypoint_head.score_lowres") for k in layer_keys]
+    layer_keys = [k.replace("kps.score.", "roi_heads.keypoint_head.score.") for k in layer_keys]
+    # --------------------------------------------------------------------------
+    # Done with replacements
+    # --------------------------------------------------------------------------
+    assert len(set(layer_keys)) == len(layer_keys)
+    assert len(original_keys) == len(layer_keys)
+    new_weights = {}
+    new_keys_to_original_keys = {}
+    for orig, renamed in zip(original_keys, layer_keys):
+        new_keys_to_original_keys[renamed] = orig
+        if renamed.startswith("bbox_pred.") or renamed.startswith("mask_head.predictor."):
+            # remove the meaningless prediction weight for background class
+            new_start_idx = 4 if renamed.startswith("bbox_pred.") else 1
+            new_weights[renamed] = weights[orig][new_start_idx:]
+            logger.info(
+                "Remove prediction weight for background class in {}. The shape changes from "
+                "{} to {}.".format(renamed, tuple(weights[orig].shape), tuple(new_weights[renamed].shape))
+            )
+        elif renamed.startswith("cls_score."):
+            # move weights of bg class from original index 0 to last index
+            logger.info(
+                "Move classification weights for background class in {} from index 0 to "
+                "index {}.".format(renamed, weights[orig].shape[0] - 1)
+            )
+            new_weights[renamed] = torch.cat([weights[orig][1:], weights[orig][:1]])
+        else:
+            new_weights[renamed] = weights[orig]
+    return new_weights, new_keys_to_original_keys
+# Note the current matching is not symmetric.
+# it assumes model_state_dict will have longer names.
+def align_and_update_state_dicts(model_state_dict, ckpt_state_dict, c2_conversion=True):
+    """
+    Match names between the two state-dict, and returns a new chkpt_state_dict with names
+    converted to match model_state_dict with heuristics. The returned dict can be later
+    loaded with fvcore checkpointer.
+    If `c2_conversion==True`, `ckpt_state_dict` is assumed to be a Caffe2
+    model and will be renamed at first.
+    Strategy: suppose that the models that we will create will have prefixes appended
+    to each of its keys, for example due to an extra level of nesting that the original
+    pre-trained weights from ImageNet won't contain. For example, model.state_dict()
+    might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains
+    res2.conv1.weight. We thus want to match both parameters together.
+    For that, we look for each model weight, look among all loaded keys if there is one
+    that is a suffix of the current weight name, and use it if that's the case.
+    If multiple matches exist, take the one with longest size
+    of the corresponding name. For example, for the same model as before, the pretrained
+    weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case,
+    we want to match backbone[0].body.conv1.weight to conv1.weight, and
+    backbone[0].body.res2.conv1.weight to res2.conv1.weight.
+    """
+    model_keys = sorted(model_state_dict.keys())
+    if c2_conversion:
+        ckpt_state_dict, original_keys = convert_c2_detectron_names(ckpt_state_dict)
+        # original_keys: the name in the original dict (before renaming)
+    else:
+        original_keys = {x: x for x in ckpt_state_dict.keys()}
+    ckpt_keys = sorted(ckpt_state_dict.keys())
+    def match(a, b):
+        # Matched ckpt_key should be a complete (starts with '.') suffix.
+        # For example, roi_heads.mesh_head.whatever_conv1 does not match conv1,
+        # but matches whatever_conv1 or mesh_head.whatever_conv1.
+        return a == b or a.endswith("." + b)
+    # get a matrix of string matches, where each (i, j) entry correspond to the size of the
+    # ckpt_key string, if it matches
+    match_matrix = [len(j) if match(i, j) else 0 for i in model_keys for j in ckpt_keys]
+    match_matrix = torch.as_tensor(match_matrix).view(len(model_keys), len(ckpt_keys))
+    # use the matched one with longest size in case of multiple matches
+    max_match_size, idxs = match_matrix.max(1)
+    # remove indices that correspond to no-match
+    idxs[max_match_size == 0] = -1
+    logger = logging.getLogger(__name__)
+    # matched_pairs (matched checkpoint key --> matched model key)
+    matched_keys = {}
+    result_state_dict = {}
+    for idx_model, idx_ckpt in enumerate(idxs.tolist()):
+        if idx_ckpt == -1:
+            continue
+        key_model = model_keys[idx_model]
+        key_ckpt = ckpt_keys[idx_ckpt]
+        value_ckpt = ckpt_state_dict[key_ckpt]
+        shape_in_model = model_state_dict[key_model].shape
+        if shape_in_model != value_ckpt.shape:
+            logger.warning(
+                "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format(
+                    key_ckpt, value_ckpt.shape, key_model, shape_in_model
+                )
+            )
+            logger.warning("{} will not be loaded. Please double check and see if this is desired.".format(key_ckpt))
+            continue
+        assert key_model not in result_state_dict
+        result_state_dict[key_model] = value_ckpt
+        if key_ckpt in matched_keys:  # already added to matched_keys
+            logger.error(
+                "Ambiguity found for {} in checkpoint!"
+                "It matches at least two keys in the model ({} and {}).".format(
+                    key_ckpt, key_model, matched_keys[key_ckpt]
+                )
+            )
+            raise ValueError("Cannot match one checkpoint key to multiple keys in the model.")
+        matched_keys[key_ckpt] = key_model
+    # logging:
+    matched_model_keys = sorted(matched_keys.values())
+    if len(matched_model_keys) == 0:
+        logger.warning("No weights in checkpoint matched with model.")
+        return ckpt_state_dict
+    common_prefix = _longest_common_prefix(matched_model_keys)
+    rev_matched_keys = {v: k for k, v in matched_keys.items()}
+    original_keys = {k: original_keys[rev_matched_keys[k]] for k in matched_model_keys}
+    model_key_groups = _group_keys_by_module(matched_model_keys, original_keys)
+    table = []
+    memo = set()
+    for key_model in matched_model_keys:
+        if key_model in memo:
+            continue
+        if key_model in model_key_groups:
+            group = model_key_groups[key_model]
+            memo |= set(group)
+            shapes = [tuple(model_state_dict[k].shape) for k in group]
+            table.append(
+                (
+                    _longest_common_prefix([k[len(common_prefix) :] for k in group]) + "*",
+                    _group_str([original_keys[k] for k in group]),
+                    " ".join([str(x).replace(" ", "") for x in shapes]),
+                )
+            )
+        else:
+            key_checkpoint = original_keys[key_model]
+            shape = str(tuple(model_state_dict[key_model].shape))
+            table.append((key_model[len(common_prefix) :], key_checkpoint, shape))
+    table_str = tabulate(table, tablefmt="pipe", headers=["Names in Model", "Names in Checkpoint", "Shapes"])
+    logger.info(
+        "Following weights matched with "
+        + (f"submodule {common_prefix[:-1]}" if common_prefix else "model")
+        + ":\n"
+        + table_str
+    )
+    unmatched_ckpt_keys = [k for k in ckpt_keys if k not in set(matched_keys.keys())]
+    for k in unmatched_ckpt_keys:
+        result_state_dict[k] = ckpt_state_dict[k]
+    return result_state_dict
+def _group_keys_by_module(keys: List[str], original_names: Dict[str, str]):
+    """
+    Params in the same submodule are grouped together.
+    Args:
+        keys: names of all parameters
+        original_names: mapping from parameter name to their name in the checkpoint
+    Returns:
+        dict[name -> all other names in the same group]
+    """
+    def _submodule_name(key):
+        pos = key.rfind(".")
+        if pos < 0:
+            return None
+        prefix = key[: pos + 1]
+        return prefix
+    all_submodules = [_submodule_name(k) for k in keys]
+    all_submodules = [x for x in all_submodules if x]
+    all_submodules = sorted(all_submodules, key=len)
+    ret = {}
+    for prefix in all_submodules:
+        group = [k for k in keys if k.startswith(prefix)]
+        if len(group) <= 1:
+            continue
+        original_name_lcp = _longest_common_prefix_str([original_names[k] for k in group])
+        if len(original_name_lcp) == 0:
+            # don't group weights if original names don't share prefix
+            continue
+        for k in group:
+            if k in ret:
+                continue
+            ret[k] = group
+    return ret
+def _longest_common_prefix(names: List[str]) -> str:
+    """
+    ["abc.zfg", "abc.zef"] -> "abc."
+    """
+    names = [n.split(".") for n in names]
+    m1, m2 = min(names), max(names)
+    ret = [a for a, b in zip(m1, m2) if a == b]
+    ret = ".".join(ret) + "." if len(ret) else ""
+    return ret
+def _longest_common_prefix_str(names: List[str]) -> str:
+    m1, m2 = min(names), max(names)
+    lcp = [a for a, b in zip(m1, m2) if a == b]
+    lcp = "".join(lcp)
+    return lcp
+def _group_str(names: List[str]) -> str:
+    """
+    Turn "common1", "common2", "common3" into "common{1,2,3}"
+    """
+    lcp = _longest_common_prefix_str(names)
+    rest = [x[len(lcp) :] for x in names]
+    rest = "{" + ",".join(rest) + "}"
+    ret = lcp + rest
+    # add some simplification for BN specifically
+    ret = ret.replace("bn_{beta,running_mean,running_var,gamma}", "bn_*")
+    ret = ret.replace("bn_beta,bn_running_mean,bn_running_var,bn_gamma", "bn_*")
+    return ret

detectron2/checkpoint/catalog.py ADDED Viewed

	@@ -0,0 +1,113 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+from detectron2.utils.file_io import PathHandler, PathManager
+class ModelCatalog(object):
+    """
+    Store mappings from names to third-party models.
+    """
+    S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron"
+    # MSRA models have STRIDE_IN_1X1=True. False otherwise.
+    # NOTE: all BN models here have fused BN into an affine layer.
+    # As a result, you should only load them to a model with "FrozenBN".
+    # Loading them to a model with regular BN or SyncBN is wrong.
+    # Even when loaded to FrozenBN, it is still different from affine by an epsilon,
+    # which should be negligible for training.
+    # NOTE: all models here uses PIXEL_STD=[1,1,1]
+    # NOTE: Most of the BN models here are no longer used. We use the
+    # re-converted pre-trained models under detectron2 model zoo instead.
+    C2_IMAGENET_MODELS = {
+        "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl",
+        "MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl",
+        "FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl",
+        "FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl",
+        "FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl",
+        "FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl",
+        "FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl",
+    }
+    C2_DETECTRON_PATH_FORMAT = "{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl"  # noqa B950
+    C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival"
+    C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival"
+    # format: {model_name} -> part of the url
+    C2_DETECTRON_MODELS = {
+        "35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW",  # noqa B950
+        "35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I",  # noqa B950
+        "35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7",  # noqa B950
+        "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ",  # noqa B950
+        "35858791/e2e_mask_rcnn_R-50-C4_1x": "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB",  # noqa B950
+        "35858933/e2e_mask_rcnn_R-50-FPN_1x": "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC",  # noqa B950
+        "35861795/e2e_mask_rcnn_R-101-FPN_1x": "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT",  # noqa B950
+        "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI",  # noqa B950
+        "48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q",  # noqa B950
+        "37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao",  # noqa B950
+        "35998355/rpn_R-50-C4_1x": "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L",  # noqa B950
+        "35998814/rpn_R-50-FPN_1x": "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179",  # noqa B950
+        "36225147/fast_R-50-FPN_1x": "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2",  # noqa B950
+    }
+    @staticmethod
+    def get(name):
+        if name.startswith("Caffe2Detectron/COCO"):
+            return ModelCatalog._get_c2_detectron_baseline(name)
+        if name.startswith("ImageNetPretrained/"):
+            return ModelCatalog._get_c2_imagenet_pretrained(name)
+        raise RuntimeError("model not present in the catalog: {}".format(name))
+    @staticmethod
+    def _get_c2_imagenet_pretrained(name):
+        prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX
+        name = name[len("ImageNetPretrained/") :]
+        name = ModelCatalog.C2_IMAGENET_MODELS[name]
+        url = "/".join([prefix, name])
+        return url
+    @staticmethod
+    def _get_c2_detectron_baseline(name):
+        name = name[len("Caffe2Detectron/COCO/") :]
+        url = ModelCatalog.C2_DETECTRON_MODELS[name]
+        if "keypoint_rcnn" in name:
+            dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS
+        else:
+            dataset = ModelCatalog.C2_DATASET_COCO
+        if "35998355/rpn_R-50-C4_1x" in name:
+            # this one model is somehow different from others ..
+            type = "rpn"
+        else:
+            type = "generalized_rcnn"
+        # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`.
+        url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format(
+            prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset
+        )
+        return url
+class ModelCatalogHandler(PathHandler):
+    """
+    Resolve URL like catalog://.
+    """
+    PREFIX = "catalog://"
+    def _get_supported_prefixes(self):
+        return [self.PREFIX]
+    def _get_local_path(self, path, **kwargs):
+        logger = logging.getLogger(__name__)
+        catalog_path = ModelCatalog.get(path[len(self.PREFIX) :])
+        logger.info("Catalog entry {} points to {}".format(path, catalog_path))
+        return PathManager.get_local_path(catalog_path, **kwargs)
+    def _open(self, path, mode="r", **kwargs):
+        return PathManager.open(self._get_local_path(path), mode, **kwargs)
+PathManager.register_handler(ModelCatalogHandler())