Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| import cv2 | |
| from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation | |
| from PIL import Image | |
| import json | |
| from scipy import ndimage | |
| from scipy.ndimage import binary_opening, binary_closing, sobel, binary_dilation, median_filter | |
| import zipfile | |
| import io | |
| import os | |
| import tempfile | |
| import shutil | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| # グローバル変数 | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| processor = None | |
| model = None | |
| # カテゴリ定義 | |
| ADE20K_TO_CITY_MAPPING = { | |
| 'road': 'road', 'street': 'road', 'path': 'road', 'sidewalk': 'road', | |
| 'building': 'building_c', 'house': 'building_a', 'skyscraper': 'building_e', | |
| 'highrise': 'building_e', 'tower': 'building_e', | |
| 'office': 'building_d', 'shop': 'building_b', 'store': 'building_b', | |
| 'apartment': 'building_c', 'hotel': 'building_d', | |
| 'tree': 'forest', 'plant': 'forest', 'palm': 'forest', | |
| 'grass': 'park', 'field': 'park', 'flower': 'park', | |
| 'water': 'water', 'sea': 'water', 'river': 'water', 'lake': 'water', | |
| 'earth': 'bare_land', 'sand': 'bare_land', 'ground': 'bare_land', | |
| 'parking lot': 'infrastructure', 'stadium': 'building_d', | |
| } | |
| CITY_CATEGORIES = { | |
| 'road': {'label': '道路', 'color': (128, 64, 128), 'height': 0, 'semantic_id': 0}, | |
| 'forest': {'label': '森林', 'color': (34, 139, 34), 'height': 1.5, 'semantic_id': 1}, | |
| 'park': {'label': '公園/緑地', 'color': (144, 238, 144), 'height': 0.5, 'semantic_id': 2}, | |
| 'water': {'label': '水域', 'color': (30, 144, 255), 'height': 0, 'semantic_id': 3}, | |
| 'building_a': {'label': '建物A(小)', 'color': (255, 200, 150), 'height': 0.6, 'semantic_id': 4}, | |
| 'building_b': {'label': '建物B(中小)', 'color': (255, 160, 122), 'height': 1.0, 'semantic_id': 5}, | |
| 'building_c': {'label': '建物C(中)', 'color': (240, 120, 90), 'height': 1.5, 'semantic_id': 6}, | |
| 'building_d': {'label': '建物D(中大)', 'color': (220, 80, 60), 'height': 2.2, 'semantic_id': 7}, | |
| 'building_e': {'label': '建物E(大)', 'color': (200, 40, 40), 'height': 3.0, 'semantic_id': 8}, | |
| 'bare_land': {'label': '空き地', 'color': (210, 180, 140), 'height': 0.1, 'semantic_id': 9}, | |
| 'infrastructure': {'label': 'インフラ', 'color': (100, 100, 100), 'height': 0.8, 'semantic_id': 10}, | |
| 'other': {'label': 'その他/境界', 'color': (80, 80, 80), 'height': 0, 'semantic_id': 11} | |
| } | |
| def load_model(): | |
| global processor, model | |
| if processor is None or model is None: | |
| processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b5-finetuned-ade-640-640") | |
| model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b5-finetuned-ade-640-640").to(device) | |
| return processor, model | |
| def map_ade20k_to_city(class_id, id2label): | |
| if class_id not in id2label: | |
| return 'other' | |
| class_name = id2label[class_id].lower() | |
| for ade_name, city_cat in ADE20K_TO_CITY_MAPPING.items(): | |
| if ade_name in class_name: | |
| return city_cat | |
| if any(w in class_name for w in ['skyscraper', 'highrise', 'tower']): | |
| return 'building_e' | |
| elif any(w in class_name for w in ['office', 'hotel', 'commercial', 'stadium']): | |
| return 'building_d' | |
| elif any(w in class_name for w in ['building', 'apartment']): | |
| return 'building_c' | |
| elif any(w in class_name for w in ['shop', 'store', 'market']): | |
| return 'building_b' | |
| elif any(w in class_name for w in ['house', 'home', 'shed', 'hut']): | |
| return 'building_a' | |
| return 'other' | |
| def segment_with_tiling(image, processor, model, tile_size=320, overlap=64, use_tiling=True): | |
| h, w = image.shape[:2] | |
| if not use_tiling or (h <= tile_size and w <= tile_size): | |
| pil_image = Image.fromarray(image) | |
| inputs = processor(images=pil_image, return_tensors="pt").to(device) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| logits = outputs.logits | |
| upsampled_logits = torch.nn.functional.interpolate( | |
| logits, size=image.shape[:2], mode="bilinear", align_corners=False | |
| ) | |
| return upsampled_logits.argmax(dim=1)[0].cpu().numpy() | |
| stride = tile_size - overlap | |
| num_tiles_h = (h - overlap) // stride + (1 if (h - overlap) % stride > 0 else 0) | |
| num_tiles_w = (w - overlap) // stride + (1 if (w - overlap) % stride > 0 else 0) | |
| votes = np.zeros((h, w, 150), dtype=np.float32) | |
| counts = np.zeros((h, w), dtype=np.float32) | |
| for i in range(num_tiles_h): | |
| for j in range(num_tiles_w): | |
| y_start = i * stride | |
| x_start = j * stride | |
| y_end = min(y_start + tile_size, h) | |
| x_end = min(x_start + tile_size, w) | |
| tile = image[y_start:y_end, x_start:x_end] | |
| pil_tile = Image.fromarray(tile) | |
| inputs = processor(images=pil_tile, return_tensors="pt").to(device) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| logits = outputs.logits | |
| upsampled = torch.nn.functional.interpolate( | |
| logits, size=tile.shape[:2], mode="bilinear", align_corners=False | |
| ) | |
| probs = torch.nn.functional.softmax(upsampled, dim=1)[0].cpu().numpy() | |
| votes[y_start:y_end, x_start:x_end] += probs.transpose(1, 2, 0) | |
| counts[y_start:y_end, x_start:x_end] += 1 | |
| counts = np.maximum(counts, 1) | |
| final_votes = votes / counts[:, :, np.newaxis] | |
| return final_votes.argmax(axis=2) | |
| def create_colored_segmentation(seg_map): | |
| h, w = seg_map.shape | |
| colored = np.zeros((h, w, 3), dtype=np.uint8) | |
| for cat_name, cat_info in CITY_CATEGORIES.items(): | |
| mask = seg_map == cat_info['semantic_id'] | |
| colored[mask] = cat_info['color'] | |
| return colored | |
| def detect_boundaries(segmentation_map, thickness=5): | |
| edges_h = np.abs(sobel(segmentation_map.astype(float), axis=0)) > 0 | |
| edges_v = np.abs(sobel(segmentation_map.astype(float), axis=1)) > 0 | |
| boundaries = edges_h | edges_v | |
| if thickness > 1: | |
| kernel = np.ones((thickness, thickness), dtype=bool) | |
| boundaries = binary_dilation(boundaries, structure=kernel) | |
| other_id = CITY_CATEGORIES['other']['semantic_id'] | |
| segmentation_map[boundaries] = other_id | |
| return segmentation_map, boundaries | |
| def create_3d_mesh(segments, image, resolution=2): | |
| height, width = image.shape[:2] | |
| meshes_data = [] | |
| for idx, segment in enumerate(segments): | |
| segmentation = segment['segmentation'] | |
| bbox = segment['bbox'] | |
| x, y, w, h = bbox | |
| if w < 3 or h < 3: | |
| continue | |
| segment_area = segmentation[y:y+h, x:x+w] | |
| segment_image = image[y:y+h, x:x+w] | |
| if not segment_area.any(): | |
| continue | |
| vertices = [] | |
| faces = [] | |
| colors = [] | |
| step = resolution | |
| building_height = segment['height'] * 0.5 | |
| for sy in range(0, segment_area.shape[0] - step, step): | |
| for sx in range(0, segment_area.shape[1] - step, step): | |
| if not segment_area[sy, sx]: | |
| continue | |
| world_x = (x + sx - width/2) * 0.1 | |
| world_z = (y + sy - height/2) * 0.1 | |
| base_idx = len(vertices) | |
| vertices.extend([ | |
| [float(world_x), float(building_height), float(world_z)], | |
| [float(world_x + step*0.1), float(building_height), float(world_z)], | |
| [float(world_x + step*0.1), float(building_height), float(world_z + step*0.1)], | |
| [float(world_x), float(building_height), float(world_z + step*0.1)] | |
| ]) | |
| vertices.extend([ | |
| [float(world_x), 0.0, float(world_z)], | |
| [float(world_x + step*0.1), 0.0, float(world_z)], | |
| [float(world_x + step*0.1), 0.0, float(world_z + step*0.1)], | |
| [float(world_x), 0.0, float(world_z + step*0.1)] | |
| ]) | |
| if sy < segment_image.shape[0] and sx < segment_image.shape[1]: | |
| color = segment_image[sy, sx] / 255.0 | |
| color_list = [float(color[0]), float(color[1]), float(color[2])] | |
| else: | |
| color_list = [0.5, 0.5, 0.5] | |
| wall_color = [c * 0.7 for c in color_list] | |
| colors.extend([color_list] * 4 + [wall_color] * 4) | |
| faces.extend([ | |
| [base_idx, base_idx+1, base_idx+2], | |
| [base_idx, base_idx+2, base_idx+3] | |
| ]) | |
| if len(vertices) > 0: | |
| meshes_data.append({ | |
| 'id': int(idx), | |
| 'category': str(segment['category']), | |
| 'label': str(segment['label']), | |
| 'semantic_id': int(segment['semantic_id']), | |
| 'vertices': vertices, | |
| 'faces': faces, | |
| 'colors': colors, | |
| 'center': [ | |
| float((x + w/2 - width/2) * 0.1), | |
| float(segment['height'] * 0.5), | |
| float((y + h/2 - height/2) * 0.1) | |
| ], | |
| 'bbox': [int(x), int(y), int(w), int(h)], | |
| 'area': float(segment['area']), | |
| 'height': float(segment['height']) | |
| }) | |
| return meshes_data | |
| def process_image(image, max_size, tile_size, tile_overlap, min_area, mesh_res, | |
| apply_morphology, morph_kernel, detect_bound, bound_thickness, | |
| apply_smoothing, smooth_iter, use_tiling): | |
| # モデルロード | |
| processor, model = load_model() | |
| # 画像前処理 | |
| if isinstance(image, str): | |
| original_image = cv2.imread(image) | |
| original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) | |
| else: | |
| original_image = np.array(image) | |
| original_height, original_width = original_image.shape[:2] | |
| if max(original_height, original_width) > max_size: | |
| scale_factor = max_size / max(original_height, original_width) | |
| new_width = int(original_width * scale_factor) | |
| new_height = int(original_height * scale_factor) | |
| resized_image = cv2.resize(original_image, (new_width, new_height), interpolation=cv2.INTER_LANCZOS4) | |
| else: | |
| resized_image = original_image.copy() | |
| # セグメンテーション | |
| predicted_seg = segment_with_tiling(resized_image, processor, model, tile_size, tile_overlap, use_tiling) | |
| # クラスマッピング | |
| city_segmentation = np.zeros(predicted_seg.shape, dtype=np.uint8) | |
| id2label = model.config.id2label | |
| for class_id in np.unique(predicted_seg): | |
| city_category = map_ade20k_to_city(class_id, id2label) | |
| semantic_id = CITY_CATEGORIES[city_category]['semantic_id'] | |
| mask = predicted_seg == class_id | |
| city_segmentation[mask] = semantic_id | |
| # クラス平滑化 | |
| if apply_smoothing: | |
| for _ in range(smooth_iter): | |
| city_segmentation = median_filter(city_segmentation, size=3) | |
| # 境界検出 | |
| if detect_bound: | |
| city_segmentation, boundary_mask = detect_boundaries(city_segmentation, bound_thickness) | |
| # セグメント抽出 | |
| segments_data = [] | |
| segment_id = 0 | |
| for cat_name, cat_info in CITY_CATEGORIES.items(): | |
| semantic_id = cat_info['semantic_id'] | |
| mask = city_segmentation == semantic_id | |
| if not mask.any(): | |
| continue | |
| if apply_morphology: | |
| kernel = np.ones((morph_kernel, morph_kernel), dtype=bool) | |
| mask = binary_opening(mask, structure=kernel) | |
| mask = binary_closing(mask, structure=kernel) | |
| labeled, num_features = ndimage.label(mask) | |
| for i in range(1, num_features + 1): | |
| segment_mask = labeled == i | |
| area = np.sum(segment_mask) | |
| if area < min_area: | |
| continue | |
| rows, cols = np.where(segment_mask) | |
| if len(rows) == 0: | |
| continue | |
| y_min, y_max = rows.min(), rows.max() | |
| x_min, x_max = cols.min(), cols.max() | |
| segments_data.append({ | |
| 'id': segment_id, | |
| 'category': cat_name, | |
| 'label': cat_info['label'], | |
| 'semantic_id': semantic_id, | |
| 'color': cat_info['color'], | |
| 'height': cat_info['height'], | |
| 'segmentation': segment_mask, | |
| 'bbox': [int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)], | |
| 'area': int(area) | |
| }) | |
| segment_id += 1 | |
| # 3Dメッシュ生成 | |
| meshes = create_3d_mesh(segments_data, resized_image, mesh_res) | |
| # メタデータ | |
| metadata = { | |
| 'version': '2.1', | |
| 'total_segments': len(meshes), | |
| 'categories': {} | |
| } | |
| for mesh in meshes: | |
| cat = mesh['category'] | |
| if cat not in metadata['categories']: | |
| metadata['categories'][cat] = {'label': mesh['label'], 'count': 0} | |
| metadata['categories'][cat]['count'] += 1 | |
| # 可視化 | |
| colored_seg = create_colored_segmentation(city_segmentation) | |
| overlay = (resized_image.astype(np.float32) * 0.5 + colored_seg.astype(np.float32) * 0.5).astype(np.uint8) | |
| # JSONファイル作成 | |
| output_data = {'metadata': metadata, 'meshes': meshes} | |
| json_str = json.dumps(output_data, ensure_ascii=False, indent=2) | |
| # ZIPファイル作成(一時ファイルとして保存) | |
| import tempfile | |
| import shutil | |
| temp_dir = tempfile.mkdtemp() | |
| zip_path = os.path.join(temp_dir, 'city_3d_output.zip') | |
| with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zip_file: | |
| zip_file.writestr('city_3d_model.json', json_str) | |
| # セグメンテーション画像 | |
| _, buffer = cv2.imencode('.png', cv2.cvtColor(colored_seg, cv2.COLOR_RGB2BGR)) | |
| zip_file.writestr('segmentation_result.png', buffer.tobytes()) | |
| # 統計情報 | |
| stats = f"総セグメント数: {len(meshes)}\n\n" | |
| for cat, info in metadata['categories'].items(): | |
| stats += f"{info['label']}: {info['count']}個\n" | |
| return colored_seg, overlay, stats, zip_path | |
| # Gradio UI | |
| with gr.Blocks(title="3D City Map Generator") as demo: | |
| gr.Markdown("# 🏙️ 3D City Map Generator") | |
| gr.Markdown("航空写真から3D都市マップを生成します(Segformer B5モデル使用)") | |
| with gr.Row(): | |
| with gr.Column(): | |
| input_image = gr.Image(label="航空写真をアップロード", type="numpy") | |
| with gr.Accordion("⚙️ 詳細設定", open=False): | |
| max_size = gr.Slider(640, 2048, value=800, step=64, label="最大画像サイズ") | |
| use_tiling = gr.Checkbox(value=True, label="タイル分割処理を使用") | |
| tile_size = gr.Slider(120, 640, value=320, step=40, label="タイルサイズ") | |
| tile_overlap = gr.Slider(32, 128, value=64, step=8, label="タイル重複") | |
| min_area = gr.Slider(20, 200, value=32, step=4, label="最小セグメント面積") | |
| mesh_res = gr.Slider(1, 4, value=3, step=1, label="メッシュ解像度") | |
| apply_morphology = gr.Checkbox(value=True, label="モルフォロジー処理") | |
| morph_kernel = gr.Slider(3, 9, value=7, step=2, label="モルフォロジーカーネル") | |
| detect_bound = gr.Checkbox(value=True, label="境界検出") | |
| bound_thickness = gr.Slider(1, 5, value=5, step=1, label="境界の太さ") | |
| apply_smoothing = gr.Checkbox(value=True, label="クラス平滑化") | |
| smooth_iter = gr.Slider(1, 3, value=2, step=1, label="平滑化反復回数") | |
| process_btn = gr.Button("🚀 3Dマップ生成", variant="primary") | |
| with gr.Column(): | |
| seg_output = gr.Image(label="セグメンテーション結果") | |
| overlay_output = gr.Image(label="オーバーレイ") | |
| stats_output = gr.Textbox(label="統計情報", lines=10) | |
| download_output = gr.File(label="📥 3Dモデルをダウンロード (ZIP)") | |
| process_btn.click( | |
| fn=process_image, | |
| inputs=[input_image, max_size, tile_size, tile_overlap, min_area, mesh_res, | |
| apply_morphology, morph_kernel, detect_bound, bound_thickness, | |
| apply_smoothing, smooth_iter, use_tiling], | |
| outputs=[seg_output, overlay_output, stats_output, download_output] | |
| ) | |
| gr.Markdown(""" | |
| ### 使い方 | |
| 1. 航空写真をアップロード | |
| 2. 必要に応じてパラメータを調整 | |
| 3. 「3Dマップ生成」をクリック | |
| 4. ZIPファイルをダウンロードして、JSONファイルをBlenderなどで使用 | |
| ### パラメータ説明 | |
| - **最大画像サイズ**: 大きいほど精度向上(処理時間増加) | |
| - **タイル分割**: 大きな画像の精度向上に重要 | |
| - **最小セグメント面積**: 増やすとノイズ削減 | |
| - **メッシュ解像度**: 増やすとファイルサイズ減少 | |
| - **境界検出**: 建物と道路の混合を防ぐ | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() |