Spaces:

intisarhasnain
/

floorplan-detection

Paused

App Files Files Community

intisarhasnain commited on 10 days ago

Commit

9b99d13

1 Parent(s): e7dde59

add model, config, submodule and Space files

Browse files

Files changed (7) hide show

.gitmodules +3 -0
README.md +46 -8
app.py +168 -0
configs/faster_rcnn.py +249 -0
mmdetection +1 -0
packages.txt +8 -0
requirements.txt +19 -0

.gitmodules ADDED Viewed

	@@ -0,0 +1,3 @@

+[submodule "mmdetection"]
+	path = mmdetection
+	url = https://github.com/open-mmlab/mmdetection

README.md CHANGED Viewed

@@ -1,14 +1,52 @@
 ---
-title: Floorplan Detection
-emoji: 🐨
-colorFrom: gray
-colorTo: indigo
 sdk: gradio
-sdk_version: 6.14.0
-python_version: '3.13'
 app_file: app.py
 pinned: false
-license: other
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Floor Plan Detection
+emoji: 🏠
+colorFrom: blue
+colorTo: green
 sdk: gradio
+sdk_version: 3.50.2
 app_file: app.py
 pinned: false
+license: mit
 ---
+# Floor Plan Detection
+Detects **walls** and **rooms** in architectural floor plan images using a
+Faster R-CNN model (ResNet-101 + FPN) fine-tuned on the
+[CubiCasa5k](https://zenodo.org/records/2613548) dataset.
+## Usage
+1. Upload a floor plan image (JPG or PNG).
+2. Click **Run Detection**.
+3. Detected walls (blue) and rooms (green) appear as bounding boxes.
+## Model
+- Architecture: Faster R-CNN · ResNet-101 · FPN
+- Framework: [MMDetection](https://github.com/open-mmlab/mmdetection) v2.x
+- Training data: CubiCasa5k (converted to COCO bbox format)
+- Classes: `wall`, `room`
+- Original repo: [xmarva/floorplan-detection](https://github.com/xmarva/floorplan-detection)
+## Repo layout expected in this Space
+```
+.
+├── app.py
+├── requirements.txt
+├── packages.txt
+├── configs/
+│   └── faster_rcnn.py          ← copy from repo configs/
+├── weights/
+│   └── faster_rcnn.pth         ← upload via Git LFS
+├── mmdetection/                ← git submodule (commit cfd5d3a)
+└── examples/
+    └── example_cubicasa5k.png  ← optional demo image
+```
+## Notes
+- Inference runs on **CPU** — expect ~30–60 s per image on the free tier.
+- Confidence threshold is set to 0.40; adjust `SCORE_THRESH` in `app.py`.

app.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import gradio as gr
+import numpy as np
+import cv2
+import torch
+import sys
+import os
+# MMDetection is installed as a local submodule
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "mmdetection"))
+from mmdet.apis import init_detector, inference_detector
+# ── Config & weights ────────────────────────────────────────────────────────
+CONFIG_FILE  = "configs/faster_rcnn.py"
+WEIGHTS_FILE = "weights/faster_rcnn.pth"
+DEVICE       = "cpu"
+# Class names must match the 2 classes the model was trained on (CubiCasa5k)
+CLASS_NAMES  = ["wall", "room"]
+# Distinct BGR colours per class for visualisation
+CLASS_COLORS = {
+    "wall": (60,  60, 220),   # red-ish
+    "room": (50, 200,  80),   # green
+}
+SCORE_THRESH = 0.4            # minimum confidence to show a box
+# ── Load model once at startup ───────────────────────────────────────────────
+print("Loading Faster R-CNN model…")
+model = init_detector(CONFIG_FILE, WEIGHTS_FILE, device=DEVICE)
+print("Model ready.")
+# ── Inference helper ─────────────────────────────────────────────────────────
+def run_inference(image: np.ndarray):
+    """
+    image : HxWx3 numpy array (RGB, uint8) from Gradio
+    returns: annotated image (RGB), summary text
+    """
+    if image is None:
+        return None, "No image provided."
+    # Gradio gives RGB; MMDetection expects BGR
+    bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+    result = inference_detector(model, bgr)
+    # result is a list of arrays, one per class
+    # each array: shape (N, 5)  →  [x1, y1, x2, y2, score]
+    annotated = bgr.copy()
+    detection_lines = []
+    total_walls = 0
+    total_rooms = 0
+    for class_idx, (class_name, bboxes) in enumerate(zip(CLASS_NAMES, result)):
+        color = CLASS_COLORS[class_name]
+        kept  = bboxes[bboxes[:, 4] >= SCORE_THRESH] if len(bboxes) else bboxes
+        for det in kept:
+            x1, y1, x2, y2, score = det
+            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
+            # Draw filled semi-transparent rectangle
+            overlay = annotated.copy()
+            cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1)
+            cv2.addWeighted(overlay, 0.15, annotated, 0.85, 0, annotated)
+            # Draw border
+            cv2.rectangle(annotated, (x1, y1), (x2, y2), color, 2)
+            # Label
+            label = f"{class_name} {score:.2f}"
+            (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+            cv2.rectangle(annotated, (x1, y1 - th - 6), (x1 + tw + 4, y1), color, -1)
+            cv2.putText(annotated, label, (x1 + 2, y1 - 4),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
+            detection_lines.append(
+                f"  • {class_name.capitalize()} @ [{x1},{y1} → {x2},{y2}]  conf={score:.3f}"
+            )
+        if class_name == "wall":
+            total_walls = len(kept)
+        elif class_name == "room":
+            total_rooms = len(kept)
+    # Back to RGB for Gradio
+    out_rgb = cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB)
+    summary = (
+        f"**Detected:** {total_walls} wall(s)  |  {total_rooms} room(s)  "
+        f"(threshold ≥ {SCORE_THRESH})\n\n"
+    )
+    if detection_lines:
+        summary += "\n".join(detection_lines)
+    else:
+        summary += "_No detections above threshold._"
+    return out_rgb, summary
+# ── Gradio UI ────────────────────────────────────────────────────────────────
+with gr.Blocks(
+    title="Floor Plan Detection",
+    theme=gr.themes.Base(
+        primary_hue="blue",
+        font=[gr.themes.GoogleFont("IBM Plex Mono"), "monospace"],
+    ),
+    css="""
+    .container { max-width: 960px; margin: auto; }
+    #title { text-align: center; padding: 1.5rem 0 0.5rem; }
+    #subtitle { text-align: center; color: #666; margin-bottom: 1.5rem; font-size: 0.95rem; }
+    #output-text { font-family: 'IBM Plex Mono', monospace; font-size: 0.85rem; }
+    """,
+) as demo:
+    with gr.Column(elem_classes="container"):
+        gr.Markdown("# 🏠 Floor Plan Detection", elem_id="title")
+        gr.Markdown(
+            "Upload an architectural floor plan image to detect **walls** and **rooms** "
+            "using a Faster R-CNN model fine-tuned on CubiCasa5k.",
+            elem_id="subtitle",
+        )
+        with gr.Row():
+            with gr.Column():
+                input_image = gr.Image(
+                    label="Input Floor Plan",
+                    type="numpy",
+                    height=420,
+                )
+                run_btn = gr.Button("Run Detection", variant="primary", size="lg")
+            with gr.Column():
+                output_image = gr.Image(
+                    label="Detections",
+                    type="numpy",
+                    height=420,
+                )
+                output_text = gr.Markdown(
+                    label="Summary",
+                    elem_id="output-text",
+                    value="_Upload an image and click **Run Detection**._",
+                )
+        gr.Markdown(
+            "**Legend:** "
+            "<span style='color:#3c3cdc'>■ Wall</span> &nbsp; "
+            "<span style='color:#32c850'>■ Room</span> &nbsp;·&nbsp; "
+            "Model: Faster R-CNN · ResNet-101 · FPN · fine-tuned on CubiCasa5k",
+            elem_id="subtitle",
+        )
+        gr.Examples(
+            examples=[["examples/example_cubicasa5k.png"]],
+            inputs=input_image,
+            label="Example images",
+        )
+    run_btn.click(
+        fn=run_inference,
+        inputs=input_image,
+        outputs=[output_image, output_text],
+    )
+if __name__ == "__main__":
+    demo.launch()

configs/faster_rcnn.py ADDED Viewed

	@@ -0,0 +1,249 @@

+_base_ = [
+    '../mmdetection/configs/_base_/models/faster-rcnn_r50_fpn.py',
+    '../mmdetection/configs/_base_/default_runtime.py'
+]
+dataset_type = 'CocoDataset'
+data_root = 'data/cubicasa5k_coco'
+classes = ('wall', 'room')  # Include both wall and room classes
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='Resize', scale=(1333, 800), keep_ratio=True),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PackDetInputs')
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='Resize', scale=(1333, 800), keep_ratio=True),
+    # If you want to keep the original test pipeline's MultiScaleFlipAug, use this instead:
+    # dict(type='TestTimeAug',
+    #      transforms=[
+    #          dict(type='Resize', scale=(1333, 800), keep_ratio=True),
+    #          dict(type='RandomFlip', prob=0.0),
+    #          dict(type='PackDetInputs')
+    #      ])
+    dict(type='PackDetInputs')
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    batch_sampler=dict(type='AspectRatioBatchSampler'),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='data/cubicasa5k_coco/train_coco_pt.json',
+        data_prefix=dict(img=''),
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=train_pipeline,
+        metainfo=dict(classes=classes)))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='data/cubicasa5k_coco/val_coco_pt.json',
+        data_prefix=dict(img=''),
+        test_mode=True,
+        pipeline=test_pipeline,
+        metainfo=dict(classes=classes)))
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type='CocoMetric',
+    ann_file='data/cubicasa5k_coco/val_coco_pt.json',
+    metric='bbox',
+    format_only=False)
+test_evaluator = val_evaluator
+max_epochs = 12
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=max_epochs,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
+model = dict(
+    type='FasterRCNN',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=2,  # Changed to 2 for wall and room
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
+    # Model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100)))
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=50),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(type='CheckpointHook', interval=1),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    visualization=dict(type='DetVisualizationHook'))
+visualizer = dict(
+    type='DetLocalVisualizer',
+    vis_backends=[
+        dict(type='LocalVisBackend'),
+        dict(type='WandbVisBackend',
+             init_kwargs={
+                 'project': 'cubicasa5k_faster_rcnn',
+                 'name': 'v1',
+                 'entity': 'ml-samurai',  # Replace with your W&B username or team name
+             })
+    ],
+    name='visualizer')
+log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
+log_level = 'INFO'
+load_from = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'
+resume = False
+vis_backends = [
+    dict(type='LocalVisBackend'),
+    dict(
+        type='WandbVisBackend',
+        init_kwargs={
+            'project': 'cubicasa5k_faster_rcnn',
+            'name': 'v1',
+            'entity': 'ml-samurai',  # Replace with your W&B username or team name
+        },
+        define_metric_cfg={
+            'loss': 'min',
+            'accuracy': 'max'
+        })
+]
+visualizer = dict(type='DetLocalVisualizer',
+                  vis_backends=vis_backends,
+                  name='visualizer')
+#load_from = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'

mmdetection ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit cfd5d3a985b0249de009b67d04f37263e11cdf3d

packages.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+# System packages installed via apt before pip
+# Required by OpenCV headless and MMDetection
+libgl1
+libglib2.0-0
+libsm6
+libxext6
+libxrender-dev
+ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+# ── Core ────────────────────────────────────────────────────────────────────
+# PyTorch CPU-only (saves ~500 MB vs the CUDA build on a CPU Space)
+--extra-index-url https://download.pytorch.org/whl/cpu
+torch==1.13.1+cpu
+torchvision==0.14.1+cpu
+# ── MMDetection stack ────────────────────────────────────────────────────────
+# mmcv-full CPU wheel for torch 1.13 — prebuilt, no compilation needed
+mmcv-full==1.7.1 -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.13/index.html
+# MMDetection is included as a git submodule (./mmdetection) and installed
+# via the setup command in packages.txt, so we don't pip-install it here.
+# ── Vision / image handling ──────────────────────────────────────────────────
+opencv-python-headless>=4.7.0
+numpy>=1.23.0,<2.0
+# ── Gradio ───────────────────────────────────────────────────────────────────
+gradio>=3.50.0,<4.0.0