import os import json import numpy as np from typing import Dict, Any import cv2 import torch import logging import base64 import uvicorn from fastapi import FastAPI, File, UploadFile, HTTPException from fastapi.responses import HTMLResponse, JSONResponse from mmdet.apis import init_detector, inference_detector import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), "mmdetection")) logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) BASE_DIR = os.path.dirname(os.path.abspath(__file__)) CONFIG_FILE = os.path.join(BASE_DIR, "configs", "faster_rcnn.py") CHECKPOINT_FILE = os.path.join(BASE_DIR, "weights", "faster_rcnn_latest.pth") MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB SCORE_THRESH = 0.3 # lower than default to catch more walls CLASS_COLORS = { 0: (220, 60, 60), # wall — red (RGB) 1: (50, 200, 80), # room — green (RGB) } CLASS_NAMES = {0: "wall", 1: "room"} # ── Device ─────────────────────────────────────────────────────────────────── def determine_device(): if torch.cuda.is_available(): try: torch.cuda.init() return "cuda:0" except Exception as e: logger.warning(f"CUDA failed: {e}. Using CPU.") return "cpu" # ── Model load ─────────────────────────────────────────────────────────────── device = determine_device() logger.info(f"Loading Faster R-CNN on {device}…") model = init_detector(CONFIG_FILE, CHECKPOINT_FILE, device=device) logger.info("Model ready.") # ── Result processing (mirrors original run.py exactly) ────────────────────── def process_inference_result(result) -> Dict[str, Any]: bboxes = result.pred_instances.bboxes.cpu().numpy() labels = result.pred_instances.labels.cpu().numpy() scores = result.pred_instances.scores.cpu().numpy() walls, rooms = [], [] for i, (bbox, label, score) in enumerate(zip(bboxes, labels, scores)): if score < SCORE_THRESH: continue x1, y1, x2, y2 = bbox item = { "id": f"{'wall' if label == 0 else 'room'}_{i+1}", "position": { "start": {"x": float(x1), "y": float(y1)}, "end": {"x": float(x2), "y": float(y2)} }, "confidence": float(score) } if label == 0: walls.append(item) else: rooms.append(item) all_scores = scores[scores >= SCORE_THRESH] return { "type": "floor_plan", "confidence": float(np.mean(all_scores)) if len(all_scores) else 0.0, "detectionResults": {"walls": walls, "rooms": rooms} } # ── Visualisation ───────────────────────────────────────────────────────────── def draw_detections(img_rgb: np.ndarray, result) -> np.ndarray: annotated = img_rgb.copy() bboxes = result.pred_instances.bboxes.cpu().numpy() labels = result.pred_instances.labels.cpu().numpy() scores = result.pred_instances.scores.cpu().numpy() for bbox, label, score in zip(bboxes, labels, scores): if score < SCORE_THRESH or label not in CLASS_NAMES: continue color = CLASS_COLORS[label] name = CLASS_NAMES[label] x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]) # Semi-transparent fill overlay = annotated.copy() cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1) cv2.addWeighted(overlay, 0.15, annotated, 0.85, 0, annotated) # Border cv2.rectangle(annotated, (x1, y1), (x2, y2), color, 2) # Label lbl = f"{name} {score:.2f}" (tw, th), _ = cv2.getTextSize(lbl, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) cv2.rectangle(annotated, (x1, y1-th-6), (x1+tw+4, y1), color, -1) cv2.putText(annotated, lbl, (x1+2, y1-4), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) return annotated # ── FastAPI ─────────────────────────────────────────────────────────────────── app = FastAPI() HTML = """
Faster R-CNN · ResNet-101 · FPN · fine-tuned on CubiCasa5k
Input
Detections