room_layout / app.py
VanNguyen1214's picture
Update app.py
75e661f verified
import gradio as gr
import numpy as np
import os
import torch
from huggingface_hub import hf_hub_download
from PIL import Image
from utils.logger import get_logger
from config.defaults import get_config
from inference import preprocess, run_one_inference
from models.build import build_model
from argparse import Namespace
def down_ckpt(model_cfg, ckpt_dir):
"""
Download model checkpoints from Hugging Face Hub
Models are organized in different folders on HF Hub
"""
# Mapping config files to their corresponding model files and folders on HF Hub
model_files = {
'src/config/mp3d.yaml': 'checkpoint/mp3d/mp3d_best.pkl',
'src/config/zind.yaml': 'checkpoint/zind/zind_best.pkl',
'src/config/pano.yaml': 'checkpoint/pano/pano_best.pkl',
'src/config/s2d3d.yaml': 'checkpoint/s2d3d/s2d3d_best.pkl',
'src/config/ablation_study/full.yaml': 'checkpoint/ablation_full/full_best.pkl'
}
if model_cfg not in model_files:
logger.warning(f"Model config {model_cfg} not found in available models")
return
model_path_on_hub = model_files[model_cfg]
local_path = os.path.join(ckpt_dir, 'best.pkl')
if not os.path.exists(local_path):
logger.info(f"Downloading model {model_path_on_hub} from Hugging Face Hub...")
os.makedirs(ckpt_dir, exist_ok=True)
try:
# Download from your HF repository with folder structure
downloaded_path = hf_hub_download(
repo_id="VanNguyen1214/LGT",
filename=model_path_on_hub,
cache_dir=None, # Use default cache
local_dir=None, # Don't specify local_dir to use cache
local_dir_use_symlinks=False
)
# Copy to expected location
import shutil
shutil.copy2(downloaded_path, local_path)
logger.info(f"Successfully downloaded and copied {model_path_on_hub}")
except Exception as e:
logger.error(f"Failed to download model {model_path_on_hub}: {str(e)}")
logger.info("Falling back to default model if available...")
# Try alternative naming patterns if the first attempt fails
alternative_patterns = [
f"{model_path_on_hub.replace('/best.pkl', '_best.pkl')}",
f"{model_path_on_hub.replace('/', '_')}",
f"{model_path_on_hub.split('/')[-2]}_best.pkl" if '/' in model_path_on_hub else model_path_on_hub
]
for alt_pattern in alternative_patterns:
try:
logger.info(f"Trying alternative pattern: {alt_pattern}")
downloaded_path = hf_hub_download(
repo_id="VanNguyen1214/LGT",
filename=alt_pattern,
cache_dir=None,
local_dir=None,
local_dir_use_symlinks=False
)
shutil.copy2(downloaded_path, local_path)
logger.info(f"Successfully downloaded with alternative pattern: {alt_pattern}")
break
except Exception as alt_e:
logger.debug(f"Alternative pattern {alt_pattern} failed: {str(alt_e)}")
continue
else:
logger.error("All download attempts failed")
else:
logger.info(f"Model already exists at {local_path}")
def detect_doors_and_connections(layout1, layout2, threshold=0.3):
"""
Detect potential door connections between two rooms
Returns connection points and alignment info
"""
import numpy as np
# Check if layout data exists and has the required structure
if not all(key in layout1 for key in ["layoutWalls"]) or not all(key in layout2 for key in ["layoutWalls"]):
return [], {}, {}
if not all(coord in layout1["layoutWalls"] for coord in ["x", "z"]) or not all(coord in layout2["layoutWalls"] for coord in ["x", "z"]):
return [], {}, {}
walls1_x = np.array(layout1["layoutWalls"]["x"])
walls1_z = np.array(layout1["layoutWalls"]["z"])
walls2_x = np.array(layout2["layoutWalls"]["x"])
walls2_z = np.array(layout2["layoutWalls"]["z"])
# Check if arrays are not empty
if len(walls1_x) == 0 or len(walls1_z) == 0 or len(walls2_x) == 0 or len(walls2_z) == 0:
return [], {}, {}
# Find room boundaries
room1_bounds = {
'min_x': walls1_x.min(), 'max_x': walls1_x.max(),
'min_z': walls1_z.min(), 'max_z': walls1_z.max()
}
room2_bounds = {
'min_x': walls2_x.min(), 'max_x': walls2_x.max(),
'min_z': walls2_z.min(), 'max_z': walls2_z.max()
}
# Check for potential door connections
connections = []
# Check if rooms can connect horizontally (door on left/right wall)
if abs(room1_bounds['max_x'] - room2_bounds['min_x']) < threshold:
# Room1 right connects to Room2 left
overlap_z_min = max(room1_bounds['min_z'], room2_bounds['min_z'])
overlap_z_max = min(room1_bounds['max_z'], room2_bounds['max_z'])
if overlap_z_max > overlap_z_min:
connections.append({
'type': 'horizontal',
'room1_side': 'right',
'room2_side': 'left',
'door_z': (overlap_z_min + overlap_z_max) / 2,
'door_x': room1_bounds['max_x'],
'overlap': overlap_z_max - overlap_z_min
})
if abs(room1_bounds['min_x'] - room2_bounds['max_x']) < threshold:
# Room1 left connects to Room2 right
overlap_z_min = max(room1_bounds['min_z'], room2_bounds['min_z'])
overlap_z_max = min(room1_bounds['max_z'], room2_bounds['max_z'])
if overlap_z_max > overlap_z_min:
connections.append({
'type': 'horizontal',
'room1_side': 'left',
'room2_side': 'right',
'door_z': (overlap_z_min + overlap_z_max) / 2,
'door_x': room1_bounds['min_x'],
'overlap': overlap_z_max - overlap_z_min
})
# Check if rooms can connect vertically (door on front/back wall)
if abs(room1_bounds['max_z'] - room2_bounds['min_z']) < threshold:
# Room1 back connects to Room2 front
overlap_x_min = max(room1_bounds['min_x'], room2_bounds['min_x'])
overlap_x_max = min(room1_bounds['max_x'], room2_bounds['max_x'])
if overlap_x_max > overlap_x_min:
connections.append({
'type': 'vertical',
'room1_side': 'back',
'room2_side': 'front',
'door_x': (overlap_x_min + overlap_x_max) / 2,
'door_z': room1_bounds['max_z'],
'overlap': overlap_x_max - overlap_x_min
})
if abs(room1_bounds['min_z'] - room2_bounds['max_z']) < threshold:
# Room1 front connects to Room2 back
overlap_x_min = max(room1_bounds['min_x'], room2_bounds['min_x'])
overlap_x_max = min(room1_bounds['max_x'], room2_bounds['max_x'])
if overlap_x_max > overlap_x_min:
connections.append({
'type': 'vertical',
'room1_side': 'front',
'room2_side': 'back',
'door_x': (overlap_x_min + overlap_x_max) / 2,
'door_z': room1_bounds['min_z'],
'overlap': overlap_x_max - overlap_x_min
})
return connections, room1_bounds, room2_bounds
def align_rooms_with_doors(layouts, door_width=0.8):
"""
Align multiple rooms based on detected door connections
"""
import numpy as np
import json
if len(layouts) < 2:
return layouts, [{'offset_x': 0, 'offset_z': 0}] * len(layouts)
# Load all layouts
room_layouts = []
for layout_path in layouts:
if os.path.exists(layout_path):
try:
with open(layout_path, 'r') as f:
layout_data = json.load(f)
# Validate layout structure
if 'layoutWalls' in layout_data and 'x' in layout_data['layoutWalls'] and 'z' in layout_data['layoutWalls']:
room_layouts.append(layout_data)
except Exception as e:
logger.warning(f"Could not load layout from {layout_path}: {e}")
continue
if len(room_layouts) < 2:
return room_layouts if room_layouts else layouts, [{'offset_x': 0, 'offset_z': 0}] * len(room_layouts if room_layouts else layouts)
# Start with first room as reference (no transformation)
aligned_layouts = [room_layouts[0]]
room_transforms = [{'offset_x': 0, 'offset_z': 0}]
# Process each subsequent room
for i in range(1, len(room_layouts)):
current_room = room_layouts[i]
best_connection = None
best_transform = {'offset_x': 0, 'offset_z': 0}
best_score = -1
# Try to connect with any previously aligned room
for j, aligned_room in enumerate(aligned_layouts):
# Apply previous transform to aligned room for comparison
transformed_aligned = apply_transform(aligned_room, room_transforms[j])
connections, room1_bounds, room2_bounds = detect_doors_and_connections(
transformed_aligned, current_room
)
# Find best connection based on overlap
for conn in connections:
if conn['overlap'] > best_score:
best_score = conn['overlap']
best_connection = conn
# Calculate transform needed to align rooms via door
if conn['type'] == 'horizontal':
if conn['room1_side'] == 'right' and conn['room2_side'] == 'left':
# Align room2's left wall with room1's right wall
offset_x = room1_bounds['max_x'] - room2_bounds['min_x']
offset_z = conn['door_z'] - conn['door_z'] # Align door positions
elif conn['room1_side'] == 'left' and conn['room2_side'] == 'right':
# Align room2's right wall with room1's left wall
offset_x = room1_bounds['min_x'] - room2_bounds['max_x']
offset_z = conn['door_z'] - conn['door_z']
else:
offset_x = 0
offset_z = 0
else: # vertical connection
if conn['room1_side'] == 'back' and conn['room2_side'] == 'front':
# Align room2's front wall with room1's back wall
offset_x = conn['door_x'] - conn['door_x']
offset_z = room1_bounds['max_z'] - room2_bounds['min_z']
elif conn['room1_side'] == 'front' and conn['room2_side'] == 'back':
# Align room2's back wall with room1's front wall
offset_x = conn['door_x'] - conn['door_x']
offset_z = room1_bounds['min_z'] - room2_bounds['max_z']
else:
offset_x = 0
offset_z = 0
best_transform = {'offset_x': offset_x, 'offset_z': offset_z}
# Apply best transform and add to aligned layouts
if best_connection and best_score > 0.5: # Minimum overlap threshold
transformed_room = apply_transform(current_room, best_transform)
aligned_layouts.append(transformed_room)
room_transforms.append(best_transform)
else:
# No good connection found, place room with default offset
default_offset = len(aligned_layouts) * 5.0 # 5 meter spacing
default_transform = {'offset_x': default_offset, 'offset_z': 0}
transformed_room = apply_transform(current_room, default_transform)
aligned_layouts.append(transformed_room)
room_transforms.append(default_transform)
return aligned_layouts, room_transforms
def apply_transform(layout, transform):
"""
Apply translation transform to a room layout
"""
import copy
transformed = copy.deepcopy(layout)
# Apply offset to all coordinate arrays
for coord_type in ['layoutWalls', 'layoutFloors', 'layoutCeilings']:
if coord_type in transformed and isinstance(transformed[coord_type], dict):
# Check if coordinate arrays exist
if 'x' in transformed[coord_type] and isinstance(transformed[coord_type]['x'], list):
for i in range(len(transformed[coord_type]['x'])):
transformed[coord_type]['x'][i] += transform['offset_x']
if 'z' in transformed[coord_type] and isinstance(transformed[coord_type]['z'], list):
for i in range(len(transformed[coord_type]['z'])):
transformed[coord_type]['z'][i] += transform['offset_z']
return transformed
def process_multi_images_simple(images, layouts):
"""
Simple combination of multiple room layouts (side by side)
"""
import json
combined_layout = {
"camera_height": 1.6,
"layoutWalls": {"x": [], "y": [], "z": []},
"layoutFloors": {"x": [], "y": [], "z": []},
"layoutCeilings": {"x": [], "y": [], "z": []},
"rooms": [] # Store individual room info
}
offset_x = 0
for i, layout_path in enumerate(layouts):
if not os.path.exists(layout_path):
continue
with open(layout_path, 'r') as f:
layout = json.load(f)
# Get room dimensions
walls_x = layout["layoutWalls"]["x"]
walls_z = layout["layoutWalls"]["z"]
room_width = max(walls_x) - min(walls_x)
# Add room info
room_info = {
"room_id": i,
"bounds": {
"min_x": min(walls_x) + offset_x,
"max_x": max(walls_x) + offset_x,
"min_z": min(walls_z),
"max_z": max(walls_z)
},
"offset_x": offset_x,
"offset_z": 0
}
combined_layout["rooms"].append(room_info)
# Offset coordinates for room positioning
for coord_type in ['layoutWalls', 'layoutFloors', 'layoutCeilings']:
if coord_type in layout:
for x in layout[coord_type]["x"]:
combined_layout[coord_type]["x"].append(x + offset_x)
for z in layout[coord_type]["z"]:
combined_layout[coord_type]["z"].append(z)
for y in layout[coord_type]["y"]:
combined_layout[coord_type]["y"].append(y)
# Update offset for next room
offset_x += room_width + 1.0 # Add 1 meter gap between rooms
return combined_layout
def process_multi_images_smart(images, layouts):
"""
Smart combination of multiple room layouts with door detection
"""
import json
# First, align rooms based on door connections
aligned_layouts, transforms = align_rooms_with_doors(layouts)
# Combine all aligned layouts
combined_layout = {
"camera_height": 1.6,
"layoutWalls": {"x": [], "y": [], "z": []},
"layoutFloors": {"x": [], "y": [], "z": []},
"layoutCeilings": {"x": [], "y": [], "z": []},
"rooms": [], # Store individual room info
"doors": [] # Store detected door connections
}
# Combine all room data
for i, layout in enumerate(aligned_layouts):
# Add room boundaries info
walls_x = layout["layoutWalls"]["x"]
walls_z = layout["layoutWalls"]["z"]
room_info = {
"room_id": i,
"bounds": {
"min_x": min(walls_x), "max_x": max(walls_x),
"min_z": min(walls_z), "max_z": max(walls_z)
},
"transform": transforms[i] if i < len(transforms) else {"offset_x": 0, "offset_z": 0}
}
combined_layout["rooms"].append(room_info)
# Combine coordinates
for coord_type in ['layoutWalls', 'layoutFloors', 'layoutCeilings']:
if coord_type in layout:
combined_layout[coord_type]["x"].extend(layout[coord_type]["x"])
combined_layout[coord_type]["y"].extend(layout[coord_type]["y"])
combined_layout[coord_type]["z"].extend(layout[coord_type]["z"])
# Detect and store door information
for i in range(len(aligned_layouts)):
for j in range(i + 1, len(aligned_layouts)):
connections, _, _ = detect_doors_and_connections(aligned_layouts[i], aligned_layouts[j])
for conn in connections:
door_info = {
"room1_id": i,
"room2_id": j,
"door_x": conn.get('door_x', 0),
"door_z": conn.get('door_z', 0),
"connection_type": conn['type'],
"overlap": conn['overlap']
}
combined_layout["doors"].append(door_info)
return combined_layout
def greet(img_paths, pre_processing, weight_name, post_processing, visualization, mesh_format, mesh_resolution, processing_mode):
import json
args.pre_processing = pre_processing
args.post_processing = post_processing
if weight_name == 'mp3d':
model = mp3d_model
elif weight_name == 'zind':
model = zind_model
else:
logger.error("unknown pre-trained weight name")
raise NotImplementedError
# Handle single image or multiple images
if isinstance(img_paths, str):
img_paths = [img_paths]
elif isinstance(img_paths, list) and len(img_paths) == 1:
img_paths = img_paths
if processing_mode == "single" or len(img_paths) == 1:
# Single room processing
img_path = img_paths[0] if isinstance(img_paths, list) else img_paths
img_name = os.path.basename(img_path).split('.')[0]
img = np.array(Image.open(img_path).resize((1024, 512), Image.Resampling.BICUBIC))[..., :3]
vp_cache_path = 'src/demo/default_vp.txt'
if args.pre_processing:
vp_cache_path = os.path.join('src/output', f'{img_name}_vp.txt')
logger.info("pre-processing ...")
img, vp = preprocess(img, vp_cache_path=vp_cache_path)
img = (img / 255.0).astype(np.float32)
run_one_inference(img, model, args, img_name,
logger=logger, show=False,
show_depth='depth-normal-gradient' in visualization,
show_floorplan='2d-floorplan' in visualization,
mesh_format=mesh_format, mesh_resolution=int(mesh_resolution))
return [os.path.join(args.output_dir, f"{img_name}_pred.png"),
os.path.join(args.output_dir, f"{img_name}_3d{mesh_format}"),
os.path.join(args.output_dir, f"{img_name}_3d{mesh_format}"),
vp_cache_path,
os.path.join(args.output_dir, f"{img_name}_pred.json")]
else:
# Multi-room processing
if processing_mode == "multi_simple":
logger.info(f"Processing {len(img_paths)} rooms in simple multi-room mode...")
else: # multi_smart
logger.info(f"Processing {len(img_paths)} rooms with smart door detection...")
individual_layouts = []
combined_images = []
for i, img_path in enumerate(img_paths):
img_name = f"room_{i}_{os.path.basename(img_path).split('.')[0]}"
img = np.array(Image.open(img_path).resize((1024, 512), Image.Resampling.BICUBIC))[..., :3]
vp_cache_path = 'src/demo/default_vp.txt'
if args.pre_processing:
vp_cache_path = os.path.join('src/output', f'{img_name}_vp.txt')
logger.info(f"pre-processing room {i+1}...")
img, vp = preprocess(img, vp_cache_path=vp_cache_path)
img = (img / 255.0).astype(np.float32)
run_one_inference(img, model, args, img_name,
logger=logger, show=False,
show_depth='depth-normal-gradient' in visualization,
show_floorplan='2d-floorplan' in visualization,
mesh_format=mesh_format, mesh_resolution=int(mesh_resolution))
individual_layouts.append(os.path.join(args.output_dir, f"{img_name}_pred.json"))
combined_images.append(os.path.join(args.output_dir, f"{img_name}_pred.png"))
# Combine layouts based on processing mode
if processing_mode == "multi_simple":
logger.info("Combining rooms with simple side-by-side layout...")
combined_layout = process_multi_images_simple(img_paths, individual_layouts)
combined_name = "multi_room_simple"
else: # multi_smart
logger.info("Combining rooms with smart door detection...")
combined_layout = process_multi_images_smart(img_paths, individual_layouts)
combined_name = "multi_room_smart"
# Save combined layout
combined_json_path = os.path.join(args.output_dir, f"{combined_name}_pred.json")
with open(combined_json_path, 'w') as f:
json.dump(combined_layout, f, indent=4)
# Create combined visualization
if len(combined_images) > 0:
imgs = [np.array(Image.open(img_path)) for img_path in combined_images if os.path.exists(img_path)]
if imgs:
combined_img = np.concatenate(imgs, axis=1)
combined_img_path = os.path.join(args.output_dir, f"{combined_name}_pred.png")
Image.fromarray(combined_img).save(combined_img_path)
else:
combined_img_path = combined_images[0] if combined_images else None
# Generate 3D mesh for combined layout
try:
logger.info("Creating combined 3D mesh...")
combined_mesh_path = os.path.join(args.output_dir, f"{combined_name}_3d{mesh_format}")
# For now, use the first room's mesh as placeholder
if individual_layouts:
first_room_mesh = os.path.join(args.output_dir, f"room_0_{os.path.basename(img_paths[0]).split('.')[0]}_3d{mesh_format}")
if os.path.exists(first_room_mesh):
import shutil
shutil.copy2(first_room_mesh, combined_mesh_path)
except Exception as e:
logger.warning(f"Could not create combined 3D mesh: {e}")
combined_mesh_path = individual_layouts[0] if individual_layouts else None
return [combined_img_path or combined_images[0] if combined_images else None,
combined_mesh_path,
combined_mesh_path,
'src/demo/default_vp.txt',
combined_json_path]
def get_model(args):
config = get_config(args)
down_ckpt(args.cfg, config.CKPT.DIR)
if ('cuda' in args.device or 'cuda' in config.TRAIN.DEVICE) and not torch.cuda.is_available():
logger.info(f'The {args.device} is not available, will use cpu...')
config.defrost()
args.device = "cpu"
config.TRAIN.DEVICE = "cpu"
config.freeze()
model, _, _, _ = build_model(config, logger)
return model
if __name__ == '__main__':
logger = get_logger()
args = Namespace(device='cuda', output_dir='src/output', visualize_3d=False, output_3d=True)
os.makedirs(args.output_dir, exist_ok=True)
args.cfg = 'src/config/mp3d.yaml'
mp3d_model = get_model(args)
args.cfg = 'src/config/zind.yaml'
zind_model = get_model(args)
description = """
# 🏠 Layout Estimate Room - Multi-Room Edition
**Ước lượng layout phòng 3D từ ảnh panorama với khả năng ghép nhiều phòng thông qua cửa**
Ứng dụng này sử dụng công nghệ AI tiên tiến để phân tích ảnh toàn cảnh 360° và tạo ra mô hình 3D chi tiết của phòng.
**🆕 Tính năng mới**: Tự động phát hiện cửa và ghép nhiều phòng thành bản đồ hoàn chỉnh!
### ✨ Tính năng nổi bật:
- 🎯 Phân tích layout phòng tự động từ ảnh panorama
- 🚪 **Phát hiện cửa thông minh** và ghép các phòng liền kề
- 🏠 **Tạo bản đồ toàn bộ ngôi nhà** từ nhiều ảnh panorama
- 🎨 Tạo mô hình 3D chất lượng cao cho từng phòng và toàn bộ không gian
- 📊 Hiển thị kết quả trực quan với thông tin chi tiết về cửa và kết nối
- 💾 Xuất file 3D và JSON với dữ liệu layout hoàn chỉnh
---
"""
# Enhanced CSS without Light/Dark mode
custom_css = """
.gradio-container {
max-width: 1800px !important;
margin: auto !important;
transition: all 0.3s ease;
}
/* Enhanced Components */
.panorama-viewer {
width: 100% !important;
height: 450px !important;
border-radius: 15px;
margin: 20px 0;
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.15);
overflow: hidden;
transition: all 0.3s ease;
}
.panorama-viewer:hover {
transform: translateY(-2px);
box-shadow: 0 12px 40px rgba(0, 0, 0, 0.2);
}
/* Improved Cards */
.info-card {
backdrop-filter: blur(10px);
border-radius: 15px;
padding: 20px;
margin: 15px 0;
transition: all 0.3s ease;
}
.info-card:hover {
transform: translateY(-2px);
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15);
}
/* Button Enhancements */
.gr-button {
border: none !important;
border-radius: 25px !important;
padding: 12px 24px !important;
font-weight: 600 !important;
transition: all 0.3s ease !important;
}
.gr-button:hover {
transform: translateY(-2px) !important;
}
/* Responsive Design */
@media (max-width: 768px) {
.gradio-container {
max-width: 100% !important;
padding: 10px !important;
}
.panorama-viewer {
height: 300px !important;
}
}
/* Animation Classes */
.fade-in {
animation: fadeIn 0.5s ease-in;
}
@keyframes fadeIn {
from { opacity: 0; transform: translateY(20px); }
to { opacity: 1; transform: translateY(0); }
}
.slide-in {
animation: slideIn 0.3s ease-out;
}
@keyframes slideIn {
from { transform: translateX(-20px); opacity: 0; }
to { transform: translateX(0); opacity: 1; }
}
"""
# Pannellum HTML template
pannellum_html = """
<!DOCTYPE HTML>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Panorama Viewer</title>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/pannellum@2.5.6/build/pannellum.css"/>
<script type="text/javascript" src="https://cdn.jsdelivr.net/npm/pannellum@2.5.6/build/pannellum.js"></script>
<style>
#panorama {
width: 100%;
height: 400px;
border-radius: 10px;
}
body {
margin: 0;
padding: 10px;
font-family: Arial, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
}
.controls {
text-align: center;
margin: 10px 0;
color: white;
}
.info {
background: rgba(255,255,255,0.9);
padding: 10px;
border-radius: 5px;
margin: 10px 0;
font-size: 14px;
}
</style>
</head>
<body>
<div class="info">
<strong>🌐 Panorama 360° Viewer</strong><br>
Sử dụng chuột để xoay, scroll để zoom. Ảnh này sẽ được phân tích để tạo layout 3D.
</div>
<div id="panorama"></div>
<div class="controls">
<p>📱 Di chuyển chuột để khám phá không gian 360°</p>
</div>
<script>
pannellum.viewer('panorama', {
"type": "equirectangular",
"panorama": "{image_path}",
"autoLoad": true,
"autoRotate": -2,
"compass": true,
"northOffset": 0,
"showZoomCtrl": true,
"showFullscreenCtrl": true,
"showControls": true,
"hotSpotDebug": false,
"backgroundColor": [0, 0, 0],
"minHfov": 50,
"maxHfov": 120,
"hfov": 100
});
</script>
</body>
</html>
"""
with gr.Blocks(css=custom_css, title="Layout Estimate Room", theme=gr.themes.Soft()) as demo:
gr.Markdown(description)
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("## 📤 Đầu vào")
# Processing mode selection
processing_mode = gr.Radio(
choices=[
"🏠 Phòng đơn",
"🏢 Phòng rộng",
"🚪 Ghép phòng thông minh"
],
label="Chế độ xử lý",
value="🏠 Phòng đơn",
info="Chọn cách thức phân tích layout"
)
# Single image input (default)
single_image = gr.Image(
type='filepath',
label='🖼️ Tải lên ảnh panorama của bạn',
value='src/demo/pano_demo1.png',
height=300,
visible=True
)
# Multiple images input (hidden by default)
multi_images = gr.File(
label='📁 Tải lên nhiều ảnh panorama (2-5 ảnh)',
file_count="multiple",
file_types=["image"],
visible=False
)
# Guide for single room
single_guide = gr.Markdown("""
**💡 Hướng dẫn phòng đơn:**
- 📷 Tải lên 1 ảnh panorama 360°
- 🎯 Phân tích layout của một phòng riêng lẻ
- ⚡ Xử lý nhanh và chính xác
""", visible=True, elem_id="single_guide")
# Guide for multi-room
multi_guide = gr.Markdown("""
**💡 Hướng dẫn phòng rộng:**
- 📁 Tải lên 2-5 ảnh panorama từ các vị trí khác nhau
- 🔗 Ghép các phòng theo thứ tự liền kề đơn giản
- 📐 Sắp xếp các phòng cạnh nhau với khoảng cách cố định
- 🎯 Phù hợp cho không gian mở hoặc các phòng độc lập
""", visible=False, elem_id="multi_guide")
# Guide for smart connection
smart_guide = gr.Markdown("""
**💡 Hướng dẫn ghép phòng thông minh:**
- 🏠 Chụp ảnh panorama từ các phòng có cửa nối với nhau
- 🚪 AI tự động phát hiện cửa và căn chỉnh vị trí chính xác
- 🧠 Phân tích hình học để ghép các phòng theo cấu trúc thực tế
- 🗺️ Tạo bản đồ hoàn chỉnh của toàn bộ ngôi nhà/văn phòng
- 📊 Kết quả bao gồm thông tin chi tiết về cửa và kết nối
- 🎯 Tối đa 5 phòng để có độ chính xác cao nhất
""", visible=False, elem_id="smart_guide")
with gr.Accordion("⚙️ Cài đặt nâng cao", open=False):
preprocessing = gr.Checkbox(
label='🔧 Tiền xử lý ảnh (khuyến nghị)',
value=True,
info="Tự động căn chỉnh ảnh để có kết quả tốt nhất"
)
model_weight = gr.Radio(
['mp3d', 'zind'],
label='🧠 Mô hình AI',
value='mp3d',
info="MP3D: Tốt cho phòng thông thường | ZInD: Tốt cho không gian phức tạp"
)
postprocessing = gr.Radio(
['manhattan', 'atalanta', 'original'],
label='🎯 Phương pháp tối ưu',
value='manhattan',
info="Manhattan: Phòng vuông góc | Atalanta: Phòng không vuông góc"
)
with gr.Accordion("🎨 Tùy chọn hiển thị", open=False):
visualization = gr.CheckboxGroup(
['depth-normal-gradient', '2d-floorplan'],
label='📊 Hiển thị kết quả 2D',
value=['depth-normal-gradient', '2d-floorplan'],
info="Chọn các loại hình ảnh phân tích muốn xem"
)
mesh_format = gr.Radio(
['.gltf', '.obj', '.glb'],
label='📁 Định dạng file 3D',
value='.gltf',
info="GLTF: Tốt nhất cho web | OBJ: Tương thích rộng | GLB: Nhỏ gọn"
)
mesh_resolution = gr.Radio(
['128', '256', '512', '1024', '2048'],
label='🔍 Độ phân giải 3D',
value='512',
info="Cao hơn = Chi tiết hơn nhưng file lớn hơn và xử lý lâu hơn"
)
process_btn = gr.Button(
"🚀 Phân tích Layout Phòng",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
gr.Markdown("## 📥 Kết quả")
result_image = gr.Image(
label='🎨 Kết quả phân tích 2D',
type='filepath',
height=300
)
model_3d = gr.Model3D(
label='🏗️ Mô hình 3D phòng',
clear_color=[0.9, 0.9, 0.9, 1.0],
height=400
)
# Panorama 360° viewer positioned below 3D model
panorama_viewer = gr.HTML(
label="🌐 Xem Panorama 360°",
value="""
<div style="text-align: center; padding: 50px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 10px; color: white;">
<h3>🌐 Panorama 360° Viewer</h3>
<p>Tải lên ảnh panorama để xem trước 360°</p>
<p><small>Hỗ trợ định dạng: JPG, PNG, JPEG</small></p>
</div>
""",
visible=True
)
with gr.Row():
mesh_file = gr.File(label='💾 Tải xuống file 3D')
vp_file = gr.File(label='📐 Thông tin vanishing point')
json_file = gr.File(label='📋 Dữ liệu layout JSON')
# Examples section
gr.Markdown("## 🎯 Ví dụ mẫu")
gr.Examples(
examples=[
['src/demo/pano_demo1.png', True, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '512'],
['src/demo/mp3d_demo1.png', False, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '512'],
['src/demo/mp3d_demo2.png', False, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '1024'],
['src/demo/zind_demo1.png', True, 'zind', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '512'],
['src/demo/zind_demo2.png', False, 'zind', 'atalanta', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '1024'],
],
inputs=[single_image, preprocessing, model_weight, postprocessing, visualization, mesh_format, mesh_resolution],
outputs=[result_image, model_3d, mesh_file, vp_file, json_file],
fn=greet,
cache_examples=False
)
# Function to handle mode switching
def switch_mode(mode):
if mode == "🏠 Phòng đơn":
return (
gr.update(visible=True), # single_image
gr.update(visible=True), # panorama_viewer
gr.update(visible=False), # multi_images
gr.update(visible=True), # single_guide
gr.update(visible=False), # multi_guide
gr.update(visible=False) # smart_guide
)
elif mode == "🏢 Phòng rộng":
return (
gr.update(visible=False), # single_image
gr.update(visible=False), # panorama_viewer
gr.update(visible=True), # multi_images
gr.update(visible=False), # single_guide
gr.update(visible=True), # multi_guide
gr.update(visible=False) # smart_guide
)
else: # "🚪 Ghép phòng thông minh"
return (
gr.update(visible=False), # single_image
gr.update(visible=False), # panorama_viewer
gr.update(visible=True), # multi_images
gr.update(visible=False), # single_guide
gr.update(visible=False), # multi_guide
gr.update(visible=True) # smart_guide
)
# Event handler for mode switching
processing_mode.change(
fn=switch_mode,
inputs=[processing_mode],
outputs=[single_image, panorama_viewer, multi_images, single_guide, multi_guide, smart_guide]
)
# Function to handle processing based on selected mode
def process_images(mode, single_img, multi_imgs, preprocessing, weight_name, postprocessing, visualization, mesh_format, mesh_resolution):
if mode == "🏠 Phòng đơn":
# Single room mode
return greet(single_img, preprocessing, weight_name, postprocessing, visualization, mesh_format, mesh_resolution, "single")
elif mode == "🏢 Phòng rộng":
# Multi-room simple mode
if multi_imgs:
img_paths = [img.name for img in multi_imgs] if isinstance(multi_imgs, list) else [multi_imgs.name]
return greet(img_paths, preprocessing, weight_name, postprocessing, visualization, mesh_format, mesh_resolution, "multi_simple")
else:
return [None, None, None, None, None]
else: # "🚪 Ghép phòng thông minh"
# Smart room connection mode
if multi_imgs:
img_paths = [img.name for img in multi_imgs] if isinstance(multi_imgs, list) else [multi_imgs.name]
return greet(img_paths, preprocessing, weight_name, postprocessing, visualization, mesh_format, mesh_resolution, "multi_smart")
else:
return [None, None, None, None, None]
# Function to update panorama viewer with iframe
def update_panorama_viewer(image_path):
if image_path is None:
return """
<div style="text-align: center; padding: 50px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 10px; color: white;">
<h3>🌐 Panorama 360° Viewer</h3>
<p>Tải lên ảnh panorama để xem trước 360°</p>
<p><small>Hỗ trợ định dạng: JPG, PNG, JPEG</small></p>
</div>
"""
try:
import base64
import os
# Create base64 data URL for the image
with open(image_path, "rb") as img_file:
img_data = base64.b64encode(img_file.read()).decode()
# Determine MIME type
file_ext = os.path.splitext(image_path)[1].lower()
if file_ext in ['.jpg', '.jpeg']:
mime_type = 'image/jpeg'
elif file_ext == '.png':
mime_type = 'image/png'
else:
mime_type = 'image/jpeg'
data_url = f"data:{mime_type};base64,{img_data}"
# Create iframe with Pannellum CDN (fixed parameters)
iframe_html = f"""
<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; border-radius: 15px; margin: 10px 0;">
<div style="background: rgba(255,255,255,0.95); color: #333; padding: 15px; border-radius: 8px; margin-bottom: 15px; font-size: 14px;">
<strong>🌐 Panorama 360° của bạn</strong><br>
Khám phá không gian trước khi phân tích layout. Sử dụng chuột để xoay, scroll để zoom.
</div>
<iframe
width="100%"
height="400"
allowfullscreen
style="border: none; border-radius: 10px; box-shadow: 0 4px 20px rgba(0,0,0,0.3);"
src="https://cdn.pannellum.org/2.5/pannellum.htm#panorama={data_url}&autoLoad=true&autoRotate=-2">
</iframe>
<div style="text-align: center; margin-top: 15px; color: white;">
<p style="margin: 5px 0;">💡 <strong>Mẹo sử dụng:</strong></p>
<p style="margin: 5px 0; font-size: 13px;">• Di chuyển chuột để xoay 360° • Scroll để zoom • Nhấn fullscreen để xem toàn màn hình</p>
</div>
</div>
"""
return iframe_html
except Exception as e:
logger.warning(f"Could not create panorama viewer: {e}")
return f"""
<div style="text-align: center; padding: 50px; background: linear-gradient(135deg, #ff6b6b 0%, #ee5a24 100%); border-radius: 10px; color: white;">
<h3>⚠️ Lỗi Panorama Viewer</h3>
<p>Không thể tải ảnh panorama. Vui lòng kiểm tra định dạng ảnh.</p>
<p><small>Lỗi: {str(e)}</small></p>
<p><small>Hỗ trợ: JPG, PNG, JPEG</small></p>
</div>
"""
# Event handler for updating panorama viewer when image changes
single_image.change(
fn=update_panorama_viewer,
inputs=[single_image],
outputs=[panorama_viewer]
)
# Initialize panorama viewer with default image on app load
demo.load(
fn=update_panorama_viewer,
inputs=[single_image],
outputs=[panorama_viewer]
)
# Event handlers
process_btn.click(
fn=process_images,
inputs=[processing_mode, single_image, multi_images, preprocessing, model_weight, postprocessing, visualization, mesh_format, mesh_resolution],
outputs=[result_image, model_3d, mesh_file, vp_file, json_file]
)
# Footer
gr.Markdown("""
---
### 📞 Hỗ trợ & Thông tin
- 🔧 **Công nghệ**: Sử dụng mạng Transformer tiên tiến cho phân tích hình học 3D
- 📊 **Độ chính xác**: 3D IoU ~81%, 2D IoU ~83%
- 💡 **Mẹo**: Sử dụng ảnh panorama chất lượng cao để có kết quả tốt nhất
- 🎯 **Ứng dụng**: Thiết kế nội thất, Bất động sản, AR/VR, Robotics
""")
demo.launch()