Spaces:

intisarhasnain
/

floorplan-detection

Sleeping

App Files Files Community

intisarhasnain commited on May 23

Commit

4fae684

1 Parent(s): 037fe03

align app.py with original run.py: BGR->RGB fix, lower wall threshold

Browse files

Files changed (1) hide show

app.py +194 -119

app.py CHANGED Viewed

@@ -1,171 +1,246 @@
 import numpy as np
 import cv2
-import sys
-import os
 import base64
-import io
-from fastapi import FastAPI, File, UploadFile
-from fastapi.responses import HTMLResponse, JSONResponse
-from PIL import Image
 import uvicorn
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "mmdetection"))
-from mmdet.apis import init_detector, inference_detector
-BASE_DIR     = os.path.dirname(os.path.abspath(__file__))
-CONFIG_FILE  = os.path.join(BASE_DIR, "configs", "faster_rcnn.py")
-WEIGHTS_FILE = os.path.join(BASE_DIR, "weights", "faster_rcnn_latest.pth")
-DEVICE       = "cpu"
-CLASS_NAMES  = ["wall", "room"]
-CLASS_COLORS = {"wall": (60, 60, 220), "room": (50, 200, 80)}
-SCORE_THRESH = 0.4
-print("Loading model…")
-model = init_detector(CONFIG_FILE, WEIGHTS_FILE, device=DEVICE)
-print("Model ready.")
 app = FastAPI()
-HTML = """
-<!DOCTYPE html>
 <html>
 <head>
   <title>Floor Plan Detection</title>
   <style>
-    body { font-family: monospace; max-width: 960px; margin: 40px auto; padding: 0 20px; background: #0f0f0f; color: #e0e0e0; }
-    h1 { color: #7eb8f7; }
-    .row { display: flex; gap: 24px; flex-wrap: wrap; }
-    .col { flex: 1; min-width: 300px; }
-    img { max-width: 100%; border: 1px solid #333; border-radius: 6px; }
-    input[type=file] { display: none; }
-    label.upload { display: inline-block; padding: 10px 20px; background: #1e3a5f; color: #7eb8f7;
-                   border: 1px solid #7eb8f7; border-radius: 4px; cursor: pointer; }
-    label.upload:hover { background: #2a4f7f; }
-    button { padding: 10px 28px; background: #7eb8f7; color: #0f0f0f; border: none;
-             border-radius: 4px; cursor: pointer; font-weight: bold; font-size: 1rem; }
-    button:hover { background: #5a9ee0; }
-    #preview, #result { width: 100%; min-height: 200px; background: #1a1a1a;
-                        border: 1px solid #333; border-radius: 6px; display: flex;
-                        align-items: center; justify-content: center; color: #555; }
-    #summary { margin-top: 12px; background: #1a1a1a; padding: 12px; border-radius: 6px;
-               border: 1px solid #333; white-space: pre-wrap; min-height: 60px; font-size: 0.9rem; }
-    .legend span { display: inline-block; width: 12px; height: 12px; margin-right: 4px; border-radius: 2px; }
-    .loading { color: #7eb8f7; }
   </style>
 </head>
 <body>
   <h1>🏠 Floor Plan Detection</h1>
-  <p>Upload a floor plan image to detect <strong>walls</strong> and <strong>rooms</strong>
-     using Faster R-CNN fine-tuned on CubiCasa5k.</p>
-  <div style="margin-bottom:16px; display:flex; gap:12px; align-items:center; flex-wrap:wrap;">
-    <label class="upload" for="fileInput">📂 Choose Image</label>
-    <input type="file" id="fileInput" accept="image/*">
-    <button onclick="runDetection()">▶ Run Detection</button>
-    <span id="filename" style="color:#555">No file chosen</span>
   </div>
   <div class="row">
     <div class="col">
-      <p style="color:#888; margin:0 0 6px">Input</p>
-      <div id="preview">No image loaded</div>
     </div>
     <div class="col">
-      <p style="color:#888; margin:0 0 6px">Detections</p>
-      <div id="result">Run detection to see results</div>
     </div>
   </div>
   <div id="summary">Upload an image and click Run Detection.</div>
-  <p class="legend" style="margin-top:16px">
-    <strong>Legend:</strong>
-    <span style="background:#3c3cdc"></span>Wall &nbsp;
-    <span style="background:#32c850"></span>Room
-  </p>
 <script>
-  let selectedFile = null;
-  document.getElementById('fileInput').addEventListener('change', function(e) {
-    selectedFile = e.target.files[0];
-    if (!selectedFile) return;
-    document.getElementById('filename').textContent = selectedFile.name;
-    const reader = new FileReader();
-    reader.onload = ev => {
-      document.getElementById('preview').innerHTML = `<img src="${ev.target.result}">`;
-    };
-    reader.readAsDataURL(selectedFile);
   });
-  async function runDetection() {
-    if (!selectedFile) { alert('Please choose an image first.'); return; }
-    document.getElementById('result').innerHTML = '<span class="loading">Running detection… (may take 30–60s on CPU)</span>';
     document.getElementById('summary').textContent = 'Processing…';
-    const form = new FormData();
-    form.append('file', selectedFile);
     try {
-      const resp = await fetch('/detect', { method: 'POST', body: form });
-      const data = await resp.json();
-      document.getElementById('result').innerHTML = `<img src="data:image/jpeg;base64,${data.image}">`;
-      document.getElementById('summary').textContent = data.summary;
-    } catch(err) {
-      document.getElementById('result').innerHTML = 'Error — see console.';
-      document.getElementById('summary').textContent = String(err);
     }
   }
 </script>
 </body>
-</html>
-"""
 @app.get("/", response_class=HTMLResponse)
 def index():
     return HTML
 @app.post("/detect")
-async def detect(file: UploadFile = File(...)):
-    contents = await file.read()
-    arr = np.frombuffer(contents, np.uint8)
-    bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR)
-    if bgr is None:
-        return JSONResponse({"error": "Could not decode image"}, status_code=400)
-    result   = inference_detector(model, bgr)
-    annotated = bgr.copy()
-    lines     = []
-    counts    = {"wall": 0, "room": 0}
-    pred   = result.pred_instances
-    bboxes = pred.bboxes.cpu().numpy()
-    scores = pred.scores.cpu().numpy()
-    labels = pred.labels.cpu().numpy()
-    for bbox, score, label in zip(bboxes, scores, labels):
-        if score < SCORE_THRESH or label >= len(CLASS_NAMES):
-            continue
-        name  = CLASS_NAMES[label]
-        color = CLASS_COLORS[name]
-        x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
-        overlay = annotated.copy()
-        cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1)
-        cv2.addWeighted(overlay, 0.15, annotated, 0.85, 0, annotated)
-        cv2.rectangle(annotated, (x1, y1), (x2, y2), color, 2)
-        lbl = f"{name} {score:.2f}"
-        (tw, th), _ = cv2.getTextSize(lbl, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
-        cv2.rectangle(annotated, (x1, y1-th-6), (x1+tw+4, y1), color, -1)
-        cv2.putText(annotated, lbl, (x1+2, y1-4), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1)
-        counts[name] += 1
-        lines.append(f"  • {name.capitalize()} [{x1},{y1} → {x2},{y2}]  conf={score:.3f}")
-    _, buf = cv2.imencode(".jpg", annotated, [cv2.IMWRITE_JPEG_QUALITY, 90])
     b64 = base64.b64encode(buf).decode()
-    summary = f"Detected: {counts['wall']} wall(s)  |  {counts['room']} room(s)  (threshold >= {SCORE_THRESH})\n\n"
-    summary += "\n".join(lines) if lines else "No detections above threshold."
-    return {"image": b64, "summary": summary}
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)

+import os
+import json
 import numpy as np
+from typing import Dict, Any
 import cv2
+import torch
+import logging
 import base64
 import uvicorn
+from fastapi import FastAPI, File, UploadFile, HTTPException
+from fastapi.responses import HTMLResponse, JSONResponse
+from mmdet.apis import init_detector, inference_detector
+import sys
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "mmdetection"))
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+BASE_DIR       = os.path.dirname(os.path.abspath(__file__))
+CONFIG_FILE    = os.path.join(BASE_DIR, "configs", "faster_rcnn.py")
+CHECKPOINT_FILE = os.path.join(BASE_DIR, "weights", "faster_rcnn_latest.pth")
+MAX_FILE_SIZE  = 10 * 1024 * 1024  # 10 MB
+SCORE_THRESH   = 0.3  # lower than default to catch more walls
+CLASS_COLORS = {
+    0: (220, 60,  60),   # wall  — red  (RGB)
+    1: (50,  200, 80),   # room  — green (RGB)
+}
+CLASS_NAMES = {0: "wall", 1: "room"}
+# ── Device ───────────────────────────────────────────────────────────────────
+def determine_device():
+    if torch.cuda.is_available():
+        try:
+            torch.cuda.init()
+            return "cuda:0"
+        except Exception as e:
+            logger.warning(f"CUDA failed: {e}. Using CPU.")
+    return "cpu"
+# ── Model load ───────────────────────────────────────────────────────────────
+device = determine_device()
+logger.info(f"Loading Faster R-CNN on {device}…")
+model = init_detector(CONFIG_FILE, CHECKPOINT_FILE, device=device)
+logger.info("Model ready.")
+# ── Result processing (mirrors original run.py exactly) ──────────────────────
+def process_inference_result(result) -> Dict[str, Any]:
+    bboxes = result.pred_instances.bboxes.cpu().numpy()
+    labels = result.pred_instances.labels.cpu().numpy()
+    scores = result.pred_instances.scores.cpu().numpy()
+    walls, rooms = [], []
+    for i, (bbox, label, score) in enumerate(zip(bboxes, labels, scores)):
+        if score < SCORE_THRESH:
+            continue
+        x1, y1, x2, y2 = bbox
+        item = {
+            "id": f"{'wall' if label == 0 else 'room'}_{i+1}",
+            "position": {
+                "start": {"x": float(x1), "y": float(y1)},
+                "end":   {"x": float(x2), "y": float(y2)}
+            },
+            "confidence": float(score)
+        }
+        if label == 0:
+            walls.append(item)
+        else:
+            rooms.append(item)
+    all_scores = scores[scores >= SCORE_THRESH]
+    return {
+        "type": "floor_plan",
+        "confidence": float(np.mean(all_scores)) if len(all_scores) else 0.0,
+        "detectionResults": {"walls": walls, "rooms": rooms}
+    }
+# ── Visualisation ─────────────────────────────────────────────────────────────
+def draw_detections(img_rgb: np.ndarray, result) -> np.ndarray:
+    annotated = img_rgb.copy()
+    bboxes = result.pred_instances.bboxes.cpu().numpy()
+    labels = result.pred_instances.labels.cpu().numpy()
+    scores = result.pred_instances.scores.cpu().numpy()
+    for bbox, label, score in zip(bboxes, labels, scores):
+        if score < SCORE_THRESH or label not in CLASS_NAMES:
+            continue
+        color = CLASS_COLORS[label]
+        name  = CLASS_NAMES[label]
+        x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
+        # Semi-transparent fill
+        overlay = annotated.copy()
+        cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1)
+        cv2.addWeighted(overlay, 0.15, annotated, 0.85, 0, annotated)
+        # Border
+        cv2.rectangle(annotated, (x1, y1), (x2, y2), color, 2)
+        # Label
+        lbl = f"{name} {score:.2f}"
+        (tw, th), _ = cv2.getTextSize(lbl, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+        cv2.rectangle(annotated, (x1, y1-th-6), (x1+tw+4, y1), color, -1)
+        cv2.putText(annotated, lbl, (x1+2, y1-4),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
+    return annotated
+# ── FastAPI ───────────────────────────────────────────────────────────────────
 app = FastAPI()
+HTML = """<!DOCTYPE html>
 <html>
 <head>
   <title>Floor Plan Detection</title>
   <style>
+    *{box-sizing:border-box;margin:0;padding:0}
+    body{font-family:monospace;background:#0f0f0f;color:#e0e0e0;padding:32px 24px}
+    h1{color:#7eb8f7;margin-bottom:8px}
+    p.sub{color:#888;margin-bottom:24px;font-size:.9rem}
+    .controls{display:flex;gap:12px;align-items:center;flex-wrap:wrap;margin-bottom:24px}
+    input[type=file]{display:none}
+    label.btn{padding:9px 18px;background:#1e3a5f;color:#7eb8f7;border:1px solid #7eb8f7;border-radius:4px;cursor:pointer}
+    label.btn:hover{background:#2a4f7f}
+    button{padding:9px 24px;background:#7eb8f7;color:#0f0f0f;border:none;border-radius:4px;cursor:pointer;font-weight:bold;font-size:.95rem}
+    button:hover{background:#5a9ee0}
+    #fname{color:#555;font-size:.85rem}
+    .row{display:flex;gap:20px;flex-wrap:wrap;margin-bottom:16px}
+    .col{flex:1;min-width:280px}
+    .col p{color:#888;font-size:.8rem;margin-bottom:6px}
+    .imgbox{background:#1a1a1a;border:1px solid #2a2a2a;border-radius:6px;min-height:220px;
+            display:flex;align-items:center;justify-content:center;color:#444;overflow:hidden}
+    .imgbox img{max-width:100%;display:block}
+    #summary{background:#1a1a1a;border:1px solid #2a2a2a;border-radius:6px;padding:14px;
+             white-space:pre-wrap;font-size:.85rem;min-height:60px;color:#ccc}
+    .legend{margin-top:12px;font-size:.85rem;color:#888}
+    .dot{display:inline-block;width:10px;height:10px;border-radius:2px;margin-right:4px;vertical-align:middle}
+    .loading{color:#7eb8f7;animation:pulse 1.2s infinite}
+    @keyframes pulse{0%,100%{opacity:1}50%{opacity:.4}}
   </style>
 </head>
 <body>
   <h1>🏠 Floor Plan Detection</h1>
+  <p class="sub">Faster R-CNN · ResNet-101 · FPN · fine-tuned on CubiCasa5k</p>
+  <div class="controls">
+    <label class="btn" for="fi">📂 Choose Image</label>
+    <input type="file" id="fi" accept="image/jpeg,image/png">
+    <button onclick="detect()">▶ Run Detection</button>
+    <span id="fname">No file chosen</span>
   </div>
   <div class="row">
     <div class="col">
+      <p>Input</p>
+      <div class="imgbox" id="preview">No image loaded</div>
     </div>
     <div class="col">
+      <p>Detections</p>
+      <div class="imgbox" id="result">Run detection to see results</div>
     </div>
   </div>
   <div id="summary">Upload an image and click Run Detection.</div>
+  <div class="legend">
+    <span class="dot" style="background:#dc3c3c"></span>Wall &nbsp;
+    <span class="dot" style="background:#32c850"></span>Room
+  </div>
 <script>
+  let file = null;
+  document.getElementById('fi').addEventListener('change', e => {
+    file = e.target.files[0];
+    if (!file) return;
+    document.getElementById('fname').textContent = file.name;
+    const r = new FileReader();
+    r.onload = ev => document.getElementById('preview').innerHTML = `<img src="${ev.target.result}">`;
+    r.readAsDataURL(file);
   });
+  async function detect() {
+    if (!file) { alert('Choose an image first.'); return; }
+    document.getElementById('result').innerHTML = '<span class="loading">Running… (30–60s on CPU)</span>';
     document.getElementById('summary').textContent = 'Processing…';
+    const fd = new FormData();
+    fd.append('image', file);
     try {
+      const r = await fetch('/detect', {method:'POST', body:fd});
+      const d = await r.json();
+      if (d.error) { document.getElementById('result').innerHTML = 'Error'; document.getElementById('summary').textContent = d.error; return; }
+      document.getElementById('result').innerHTML = `<img src="data:image/jpeg;base64,${d.image}">`;
+      const w = d.json.detectionResults.walls.length;
+      const rm = d.json.detectionResults.rooms.length;
+      let txt = `Detected: ${w} wall(s)  |  ${rm} room(s)  (conf threshold: 0.30)\n`;
+      txt += `Overall confidence: ${(d.json.confidence*100).toFixed(1)}%\n\n`;
+      d.json.detectionResults.walls.forEach(x => txt += `  • Wall   ${x.id}  conf=${x.confidence.toFixed(3)}\n`);
+      d.json.detectionResults.rooms.forEach(x => txt += `  • Room   ${x.id}  conf=${x.confidence.toFixed(3)}\n`);
+      document.getElementById('summary').textContent = txt;
+    } catch(e) {
+      document.getElementById('result').innerHTML = 'Error';
+      document.getElementById('summary').textContent = String(e);
     }
   }
 </script>
 </body>
+</html>"""
 @app.get("/", response_class=HTMLResponse)
 def index():
     return HTML
 @app.post("/detect")
+async def detect(image: UploadFile = File(...)):
+    if image.content_type not in ["image/jpeg", "image/png"]:
+        raise HTTPException(status_code=400, detail="Only JPEG and PNG supported.")
+    contents = await image.read()
+    if len(contents) > MAX_FILE_SIZE:
+        raise HTTPException(status_code=400, detail="File exceeds 10 MB limit.")
+    nparr = np.frombuffer(contents, np.uint8)
+    img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+    if img_bgr is None:
+        raise HTTPException(status_code=400, detail="Could not decode image.")
+    # Original run.py converts BGR→RGB before inference
+    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
+    result = inference_detector(model, img_rgb)
+    # JSON output — matches original run.py exactly
+    processed = process_inference_result(result)
+    # Visual output — draw on RGB image, encode as JPEG
+    annotated_rgb = draw_detections(img_rgb, result)
+    annotated_bgr = cv2.cvtColor(annotated_rgb, cv2.COLOR_RGB2BGR)
+    _, buf = cv2.imencode(".jpg", annotated_bgr, [cv2.IMWRITE_JPEG_QUALITY, 90])
     b64 = base64.b64encode(buf).decode()
+    logger.info(f"Inference done: {len(processed['detectionResults']['walls'])} walls, "
+                f"{len(processed['detectionResults']['rooms'])} rooms")
+    return {"image": b64, "json": processed}
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)