Commit Β·
9b99d13
1
Parent(s): e7dde59
add model, config, submodule and Space files
Browse files- .gitmodules +3 -0
- README.md +46 -8
- app.py +168 -0
- configs/faster_rcnn.py +249 -0
- mmdetection +1 -0
- packages.txt +8 -0
- requirements.txt +19 -0
.gitmodules
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[submodule "mmdetection"]
|
| 2 |
+
path = mmdetection
|
| 3 |
+
url = https://github.com/open-mmlab/mmdetection
|
README.md
CHANGED
|
@@ -1,14 +1,52 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
-
python_version: '3.13'
|
| 9 |
app_file: app.py
|
| 10 |
pinned: false
|
| 11 |
-
license:
|
| 12 |
---
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Floor Plan Detection
|
| 3 |
+
emoji: π
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 3.50.2
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# Floor Plan Detection
|
| 14 |
+
|
| 15 |
+
Detects **walls** and **rooms** in architectural floor plan images using a
|
| 16 |
+
Faster R-CNN model (ResNet-101 + FPN) fine-tuned on the
|
| 17 |
+
[CubiCasa5k](https://zenodo.org/records/2613548) dataset.
|
| 18 |
+
|
| 19 |
+
## Usage
|
| 20 |
+
|
| 21 |
+
1. Upload a floor plan image (JPG or PNG).
|
| 22 |
+
2. Click **Run Detection**.
|
| 23 |
+
3. Detected walls (blue) and rooms (green) appear as bounding boxes.
|
| 24 |
+
|
| 25 |
+
## Model
|
| 26 |
+
|
| 27 |
+
- Architecture: Faster R-CNN Β· ResNet-101 Β· FPN
|
| 28 |
+
- Framework: [MMDetection](https://github.com/open-mmlab/mmdetection) v2.x
|
| 29 |
+
- Training data: CubiCasa5k (converted to COCO bbox format)
|
| 30 |
+
- Classes: `wall`, `room`
|
| 31 |
+
- Original repo: [xmarva/floorplan-detection](https://github.com/xmarva/floorplan-detection)
|
| 32 |
+
|
| 33 |
+
## Repo layout expected in this Space
|
| 34 |
+
|
| 35 |
+
```
|
| 36 |
+
.
|
| 37 |
+
βββ app.py
|
| 38 |
+
βββ requirements.txt
|
| 39 |
+
βββ packages.txt
|
| 40 |
+
βββ configs/
|
| 41 |
+
β βββ faster_rcnn.py β copy from repo configs/
|
| 42 |
+
βββ weights/
|
| 43 |
+
β βββ faster_rcnn.pth β upload via Git LFS
|
| 44 |
+
βββ mmdetection/ β git submodule (commit cfd5d3a)
|
| 45 |
+
βββ examples/
|
| 46 |
+
βββ example_cubicasa5k.png β optional demo image
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
## Notes
|
| 50 |
+
|
| 51 |
+
- Inference runs on **CPU** β expect ~30β60 s per image on the free tier.
|
| 52 |
+
- Confidence threshold is set to 0.40; adjust `SCORE_THRESH` in `app.py`.
|
app.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import numpy as np
|
| 3 |
+
import cv2
|
| 4 |
+
import torch
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
# MMDetection is installed as a local submodule
|
| 9 |
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "mmdetection"))
|
| 10 |
+
|
| 11 |
+
from mmdet.apis import init_detector, inference_detector
|
| 12 |
+
|
| 13 |
+
# ββ Config & weights ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 14 |
+
CONFIG_FILE = "configs/faster_rcnn.py"
|
| 15 |
+
WEIGHTS_FILE = "weights/faster_rcnn.pth"
|
| 16 |
+
DEVICE = "cpu"
|
| 17 |
+
|
| 18 |
+
# Class names must match the 2 classes the model was trained on (CubiCasa5k)
|
| 19 |
+
CLASS_NAMES = ["wall", "room"]
|
| 20 |
+
# Distinct BGR colours per class for visualisation
|
| 21 |
+
CLASS_COLORS = {
|
| 22 |
+
"wall": (60, 60, 220), # red-ish
|
| 23 |
+
"room": (50, 200, 80), # green
|
| 24 |
+
}
|
| 25 |
+
SCORE_THRESH = 0.4 # minimum confidence to show a box
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
# ββ Load model once at startup βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 29 |
+
print("Loading Faster R-CNN modelβ¦")
|
| 30 |
+
model = init_detector(CONFIG_FILE, WEIGHTS_FILE, device=DEVICE)
|
| 31 |
+
print("Model ready.")
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# ββ Inference helper βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
+
def run_inference(image: np.ndarray):
|
| 36 |
+
"""
|
| 37 |
+
image : HxWx3 numpy array (RGB, uint8) from Gradio
|
| 38 |
+
returns: annotated image (RGB), summary text
|
| 39 |
+
"""
|
| 40 |
+
if image is None:
|
| 41 |
+
return None, "No image provided."
|
| 42 |
+
|
| 43 |
+
# Gradio gives RGB; MMDetection expects BGR
|
| 44 |
+
bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
| 45 |
+
|
| 46 |
+
result = inference_detector(model, bgr)
|
| 47 |
+
|
| 48 |
+
# result is a list of arrays, one per class
|
| 49 |
+
# each array: shape (N, 5) β [x1, y1, x2, y2, score]
|
| 50 |
+
annotated = bgr.copy()
|
| 51 |
+
detection_lines = []
|
| 52 |
+
|
| 53 |
+
total_walls = 0
|
| 54 |
+
total_rooms = 0
|
| 55 |
+
|
| 56 |
+
for class_idx, (class_name, bboxes) in enumerate(zip(CLASS_NAMES, result)):
|
| 57 |
+
color = CLASS_COLORS[class_name]
|
| 58 |
+
kept = bboxes[bboxes[:, 4] >= SCORE_THRESH] if len(bboxes) else bboxes
|
| 59 |
+
|
| 60 |
+
for det in kept:
|
| 61 |
+
x1, y1, x2, y2, score = det
|
| 62 |
+
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
|
| 63 |
+
|
| 64 |
+
# Draw filled semi-transparent rectangle
|
| 65 |
+
overlay = annotated.copy()
|
| 66 |
+
cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1)
|
| 67 |
+
cv2.addWeighted(overlay, 0.15, annotated, 0.85, 0, annotated)
|
| 68 |
+
|
| 69 |
+
# Draw border
|
| 70 |
+
cv2.rectangle(annotated, (x1, y1), (x2, y2), color, 2)
|
| 71 |
+
|
| 72 |
+
# Label
|
| 73 |
+
label = f"{class_name} {score:.2f}"
|
| 74 |
+
(tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
|
| 75 |
+
cv2.rectangle(annotated, (x1, y1 - th - 6), (x1 + tw + 4, y1), color, -1)
|
| 76 |
+
cv2.putText(annotated, label, (x1 + 2, y1 - 4),
|
| 77 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
|
| 78 |
+
|
| 79 |
+
detection_lines.append(
|
| 80 |
+
f" β’ {class_name.capitalize()} @ [{x1},{y1} β {x2},{y2}] conf={score:.3f}"
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
if class_name == "wall":
|
| 84 |
+
total_walls = len(kept)
|
| 85 |
+
elif class_name == "room":
|
| 86 |
+
total_rooms = len(kept)
|
| 87 |
+
|
| 88 |
+
# Back to RGB for Gradio
|
| 89 |
+
out_rgb = cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB)
|
| 90 |
+
|
| 91 |
+
summary = (
|
| 92 |
+
f"**Detected:** {total_walls} wall(s) | {total_rooms} room(s) "
|
| 93 |
+
f"(threshold β₯ {SCORE_THRESH})\n\n"
|
| 94 |
+
)
|
| 95 |
+
if detection_lines:
|
| 96 |
+
summary += "\n".join(detection_lines)
|
| 97 |
+
else:
|
| 98 |
+
summary += "_No detections above threshold._"
|
| 99 |
+
|
| 100 |
+
return out_rgb, summary
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
# ββ Gradio UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 104 |
+
with gr.Blocks(
|
| 105 |
+
title="Floor Plan Detection",
|
| 106 |
+
theme=gr.themes.Base(
|
| 107 |
+
primary_hue="blue",
|
| 108 |
+
font=[gr.themes.GoogleFont("IBM Plex Mono"), "monospace"],
|
| 109 |
+
),
|
| 110 |
+
css="""
|
| 111 |
+
.container { max-width: 960px; margin: auto; }
|
| 112 |
+
#title { text-align: center; padding: 1.5rem 0 0.5rem; }
|
| 113 |
+
#subtitle { text-align: center; color: #666; margin-bottom: 1.5rem; font-size: 0.95rem; }
|
| 114 |
+
#output-text { font-family: 'IBM Plex Mono', monospace; font-size: 0.85rem; }
|
| 115 |
+
""",
|
| 116 |
+
) as demo:
|
| 117 |
+
|
| 118 |
+
with gr.Column(elem_classes="container"):
|
| 119 |
+
gr.Markdown("# π Floor Plan Detection", elem_id="title")
|
| 120 |
+
gr.Markdown(
|
| 121 |
+
"Upload an architectural floor plan image to detect **walls** and **rooms** "
|
| 122 |
+
"using a Faster R-CNN model fine-tuned on CubiCasa5k.",
|
| 123 |
+
elem_id="subtitle",
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
with gr.Row():
|
| 127 |
+
with gr.Column():
|
| 128 |
+
input_image = gr.Image(
|
| 129 |
+
label="Input Floor Plan",
|
| 130 |
+
type="numpy",
|
| 131 |
+
height=420,
|
| 132 |
+
)
|
| 133 |
+
run_btn = gr.Button("Run Detection", variant="primary", size="lg")
|
| 134 |
+
|
| 135 |
+
with gr.Column():
|
| 136 |
+
output_image = gr.Image(
|
| 137 |
+
label="Detections",
|
| 138 |
+
type="numpy",
|
| 139 |
+
height=420,
|
| 140 |
+
)
|
| 141 |
+
output_text = gr.Markdown(
|
| 142 |
+
label="Summary",
|
| 143 |
+
elem_id="output-text",
|
| 144 |
+
value="_Upload an image and click **Run Detection**._",
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
gr.Markdown(
|
| 148 |
+
"**Legend:** "
|
| 149 |
+
"<span style='color:#3c3cdc'>β Wall</span> "
|
| 150 |
+
"<span style='color:#32c850'>β Room</span> Β· "
|
| 151 |
+
"Model: Faster R-CNN Β· ResNet-101 Β· FPN Β· fine-tuned on CubiCasa5k",
|
| 152 |
+
elem_id="subtitle",
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
gr.Examples(
|
| 156 |
+
examples=[["examples/example_cubicasa5k.png"]],
|
| 157 |
+
inputs=input_image,
|
| 158 |
+
label="Example images",
|
| 159 |
+
)
|
| 160 |
+
|
| 161 |
+
run_btn.click(
|
| 162 |
+
fn=run_inference,
|
| 163 |
+
inputs=input_image,
|
| 164 |
+
outputs=[output_image, output_text],
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
if __name__ == "__main__":
|
| 168 |
+
demo.launch()
|
configs/faster_rcnn.py
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_base_ = [
|
| 2 |
+
'../mmdetection/configs/_base_/models/faster-rcnn_r50_fpn.py',
|
| 3 |
+
'../mmdetection/configs/_base_/default_runtime.py'
|
| 4 |
+
]
|
| 5 |
+
|
| 6 |
+
dataset_type = 'CocoDataset'
|
| 7 |
+
data_root = 'data/cubicasa5k_coco'
|
| 8 |
+
classes = ('wall', 'room') # Include both wall and room classes
|
| 9 |
+
|
| 10 |
+
img_norm_cfg = dict(
|
| 11 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 12 |
+
|
| 13 |
+
train_pipeline = [
|
| 14 |
+
dict(type='LoadImageFromFile'),
|
| 15 |
+
dict(type='LoadAnnotations', with_bbox=True),
|
| 16 |
+
dict(type='Resize', scale=(1333, 800), keep_ratio=True),
|
| 17 |
+
dict(type='RandomFlip', prob=0.5),
|
| 18 |
+
dict(type='PackDetInputs')
|
| 19 |
+
]
|
| 20 |
+
|
| 21 |
+
test_pipeline = [
|
| 22 |
+
dict(type='LoadImageFromFile'),
|
| 23 |
+
dict(type='Resize', scale=(1333, 800), keep_ratio=True),
|
| 24 |
+
# If you want to keep the original test pipeline's MultiScaleFlipAug, use this instead:
|
| 25 |
+
# dict(type='TestTimeAug',
|
| 26 |
+
# transforms=[
|
| 27 |
+
# dict(type='Resize', scale=(1333, 800), keep_ratio=True),
|
| 28 |
+
# dict(type='RandomFlip', prob=0.0),
|
| 29 |
+
# dict(type='PackDetInputs')
|
| 30 |
+
# ])
|
| 31 |
+
dict(type='PackDetInputs')
|
| 32 |
+
]
|
| 33 |
+
|
| 34 |
+
train_dataloader = dict(
|
| 35 |
+
batch_size=2,
|
| 36 |
+
num_workers=2,
|
| 37 |
+
persistent_workers=True,
|
| 38 |
+
sampler=dict(type='DefaultSampler', shuffle=True),
|
| 39 |
+
batch_sampler=dict(type='AspectRatioBatchSampler'),
|
| 40 |
+
dataset=dict(
|
| 41 |
+
type=dataset_type,
|
| 42 |
+
data_root=data_root,
|
| 43 |
+
ann_file='data/cubicasa5k_coco/train_coco_pt.json',
|
| 44 |
+
data_prefix=dict(img=''),
|
| 45 |
+
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
| 46 |
+
pipeline=train_pipeline,
|
| 47 |
+
metainfo=dict(classes=classes)))
|
| 48 |
+
|
| 49 |
+
val_dataloader = dict(
|
| 50 |
+
batch_size=1,
|
| 51 |
+
num_workers=2,
|
| 52 |
+
persistent_workers=True,
|
| 53 |
+
drop_last=False,
|
| 54 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
| 55 |
+
dataset=dict(
|
| 56 |
+
type=dataset_type,
|
| 57 |
+
data_root=data_root,
|
| 58 |
+
ann_file='data/cubicasa5k_coco/val_coco_pt.json',
|
| 59 |
+
data_prefix=dict(img=''),
|
| 60 |
+
test_mode=True,
|
| 61 |
+
pipeline=test_pipeline,
|
| 62 |
+
metainfo=dict(classes=classes)))
|
| 63 |
+
|
| 64 |
+
test_dataloader = val_dataloader
|
| 65 |
+
|
| 66 |
+
val_evaluator = dict(
|
| 67 |
+
type='CocoMetric',
|
| 68 |
+
ann_file='data/cubicasa5k_coco/val_coco_pt.json',
|
| 69 |
+
metric='bbox',
|
| 70 |
+
format_only=False)
|
| 71 |
+
|
| 72 |
+
test_evaluator = val_evaluator
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
max_epochs = 12
|
| 76 |
+
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
|
| 77 |
+
val_cfg = dict(type='ValLoop')
|
| 78 |
+
test_cfg = dict(type='TestLoop')
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
param_scheduler = [
|
| 82 |
+
dict(
|
| 83 |
+
type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
|
| 84 |
+
dict(
|
| 85 |
+
type='MultiStepLR',
|
| 86 |
+
begin=0,
|
| 87 |
+
end=max_epochs,
|
| 88 |
+
by_epoch=True,
|
| 89 |
+
milestones=[8, 11],
|
| 90 |
+
gamma=0.1)
|
| 91 |
+
]
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
optim_wrapper = dict(
|
| 95 |
+
type='OptimWrapper',
|
| 96 |
+
optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
|
| 97 |
+
|
| 98 |
+
model = dict(
|
| 99 |
+
type='FasterRCNN',
|
| 100 |
+
backbone=dict(
|
| 101 |
+
type='ResNet',
|
| 102 |
+
depth=50,
|
| 103 |
+
num_stages=4,
|
| 104 |
+
out_indices=(0, 1, 2, 3),
|
| 105 |
+
frozen_stages=1,
|
| 106 |
+
norm_cfg=dict(type='BN', requires_grad=True),
|
| 107 |
+
norm_eval=True,
|
| 108 |
+
style='pytorch',
|
| 109 |
+
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
|
| 110 |
+
neck=dict(
|
| 111 |
+
type='FPN',
|
| 112 |
+
in_channels=[256, 512, 1024, 2048],
|
| 113 |
+
out_channels=256,
|
| 114 |
+
num_outs=5),
|
| 115 |
+
rpn_head=dict(
|
| 116 |
+
type='RPNHead',
|
| 117 |
+
in_channels=256,
|
| 118 |
+
feat_channels=256,
|
| 119 |
+
anchor_generator=dict(
|
| 120 |
+
type='AnchorGenerator',
|
| 121 |
+
scales=[8],
|
| 122 |
+
ratios=[0.5, 1.0, 2.0],
|
| 123 |
+
strides=[4, 8, 16, 32, 64]),
|
| 124 |
+
bbox_coder=dict(
|
| 125 |
+
type='DeltaXYWHBBoxCoder',
|
| 126 |
+
target_means=[.0, .0, .0, .0],
|
| 127 |
+
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
| 128 |
+
loss_cls=dict(
|
| 129 |
+
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
| 130 |
+
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
| 131 |
+
roi_head=dict(
|
| 132 |
+
type='StandardRoIHead',
|
| 133 |
+
bbox_roi_extractor=dict(
|
| 134 |
+
type='SingleRoIExtractor',
|
| 135 |
+
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
|
| 136 |
+
out_channels=256,
|
| 137 |
+
featmap_strides=[4, 8, 16, 32]),
|
| 138 |
+
bbox_head=dict(
|
| 139 |
+
type='Shared2FCBBoxHead',
|
| 140 |
+
in_channels=256,
|
| 141 |
+
fc_out_channels=1024,
|
| 142 |
+
roi_feat_size=7,
|
| 143 |
+
num_classes=2, # Changed to 2 for wall and room
|
| 144 |
+
bbox_coder=dict(
|
| 145 |
+
type='DeltaXYWHBBoxCoder',
|
| 146 |
+
target_means=[0., 0., 0., 0.],
|
| 147 |
+
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
| 148 |
+
reg_class_agnostic=False,
|
| 149 |
+
loss_cls=dict(
|
| 150 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
| 151 |
+
loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
|
| 152 |
+
# Model training and testing settings
|
| 153 |
+
train_cfg=dict(
|
| 154 |
+
rpn=dict(
|
| 155 |
+
assigner=dict(
|
| 156 |
+
type='MaxIoUAssigner',
|
| 157 |
+
pos_iou_thr=0.7,
|
| 158 |
+
neg_iou_thr=0.3,
|
| 159 |
+
min_pos_iou=0.3,
|
| 160 |
+
match_low_quality=True,
|
| 161 |
+
ignore_iof_thr=-1),
|
| 162 |
+
sampler=dict(
|
| 163 |
+
type='RandomSampler',
|
| 164 |
+
num=256,
|
| 165 |
+
pos_fraction=0.5,
|
| 166 |
+
neg_pos_ub=-1,
|
| 167 |
+
add_gt_as_proposals=False),
|
| 168 |
+
allowed_border=-1,
|
| 169 |
+
pos_weight=-1,
|
| 170 |
+
debug=False),
|
| 171 |
+
rpn_proposal=dict(
|
| 172 |
+
nms_pre=2000,
|
| 173 |
+
max_per_img=1000,
|
| 174 |
+
nms=dict(type='nms', iou_threshold=0.7),
|
| 175 |
+
min_bbox_size=0),
|
| 176 |
+
rcnn=dict(
|
| 177 |
+
assigner=dict(
|
| 178 |
+
type='MaxIoUAssigner',
|
| 179 |
+
pos_iou_thr=0.5,
|
| 180 |
+
neg_iou_thr=0.5,
|
| 181 |
+
min_pos_iou=0.5,
|
| 182 |
+
match_low_quality=False,
|
| 183 |
+
ignore_iof_thr=-1),
|
| 184 |
+
sampler=dict(
|
| 185 |
+
type='RandomSampler',
|
| 186 |
+
num=512,
|
| 187 |
+
pos_fraction=0.25,
|
| 188 |
+
neg_pos_ub=-1,
|
| 189 |
+
add_gt_as_proposals=True),
|
| 190 |
+
pos_weight=-1,
|
| 191 |
+
debug=False)),
|
| 192 |
+
test_cfg=dict(
|
| 193 |
+
rpn=dict(
|
| 194 |
+
nms_pre=1000,
|
| 195 |
+
max_per_img=1000,
|
| 196 |
+
nms=dict(type='nms', iou_threshold=0.7),
|
| 197 |
+
min_bbox_size=0),
|
| 198 |
+
rcnn=dict(
|
| 199 |
+
score_thr=0.05,
|
| 200 |
+
nms=dict(type='nms', iou_threshold=0.5),
|
| 201 |
+
max_per_img=100)))
|
| 202 |
+
|
| 203 |
+
default_hooks = dict(
|
| 204 |
+
timer=dict(type='IterTimerHook'),
|
| 205 |
+
logger=dict(type='LoggerHook', interval=50),
|
| 206 |
+
param_scheduler=dict(type='ParamSchedulerHook'),
|
| 207 |
+
checkpoint=dict(type='CheckpointHook', interval=1),
|
| 208 |
+
sampler_seed=dict(type='DistSamplerSeedHook'),
|
| 209 |
+
visualization=dict(type='DetVisualizationHook'))
|
| 210 |
+
|
| 211 |
+
visualizer = dict(
|
| 212 |
+
type='DetLocalVisualizer',
|
| 213 |
+
vis_backends=[
|
| 214 |
+
dict(type='LocalVisBackend'),
|
| 215 |
+
dict(type='WandbVisBackend',
|
| 216 |
+
init_kwargs={
|
| 217 |
+
'project': 'cubicasa5k_faster_rcnn',
|
| 218 |
+
'name': 'v1',
|
| 219 |
+
'entity': 'ml-samurai', # Replace with your W&B username or team name
|
| 220 |
+
})
|
| 221 |
+
],
|
| 222 |
+
name='visualizer')
|
| 223 |
+
|
| 224 |
+
log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
|
| 225 |
+
|
| 226 |
+
log_level = 'INFO'
|
| 227 |
+
load_from = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'
|
| 228 |
+
resume = False
|
| 229 |
+
|
| 230 |
+
vis_backends = [
|
| 231 |
+
dict(type='LocalVisBackend'),
|
| 232 |
+
dict(
|
| 233 |
+
type='WandbVisBackend',
|
| 234 |
+
init_kwargs={
|
| 235 |
+
'project': 'cubicasa5k_faster_rcnn',
|
| 236 |
+
'name': 'v1',
|
| 237 |
+
'entity': 'ml-samurai', # Replace with your W&B username or team name
|
| 238 |
+
},
|
| 239 |
+
define_metric_cfg={
|
| 240 |
+
'loss': 'min',
|
| 241 |
+
'accuracy': 'max'
|
| 242 |
+
})
|
| 243 |
+
]
|
| 244 |
+
|
| 245 |
+
visualizer = dict(type='DetLocalVisualizer',
|
| 246 |
+
vis_backends=vis_backends,
|
| 247 |
+
name='visualizer')
|
| 248 |
+
|
| 249 |
+
#load_from = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'
|
mmdetection
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Subproject commit cfd5d3a985b0249de009b67d04f37263e11cdf3d
|
packages.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# System packages installed via apt before pip
|
| 2 |
+
# Required by OpenCV headless and MMDetection
|
| 3 |
+
libgl1
|
| 4 |
+
libglib2.0-0
|
| 5 |
+
libsm6
|
| 6 |
+
libxext6
|
| 7 |
+
libxrender-dev
|
| 8 |
+
ffmpeg
|
requirements.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ββ Core ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2 |
+
# PyTorch CPU-only (saves ~500 MB vs the CUDA build on a CPU Space)
|
| 3 |
+
--extra-index-url https://download.pytorch.org/whl/cpu
|
| 4 |
+
torch==1.13.1+cpu
|
| 5 |
+
torchvision==0.14.1+cpu
|
| 6 |
+
|
| 7 |
+
# ββ MMDetection stack ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 8 |
+
# mmcv-full CPU wheel for torch 1.13 β prebuilt, no compilation needed
|
| 9 |
+
mmcv-full==1.7.1 -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.13/index.html
|
| 10 |
+
|
| 11 |
+
# MMDetection is included as a git submodule (./mmdetection) and installed
|
| 12 |
+
# via the setup command in packages.txt, so we don't pip-install it here.
|
| 13 |
+
|
| 14 |
+
# ββ Vision / image handling ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 15 |
+
opencv-python-headless>=4.7.0
|
| 16 |
+
numpy>=1.23.0,<2.0
|
| 17 |
+
|
| 18 |
+
# ββ Gradio βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 19 |
+
gradio>=3.50.0,<4.0.0
|