Spaces:

oggata
/

map-segment-tool

Sleeping

App Files Files Community

oggata commited on Nov 11, 2025

Commit

9150cca

verified ·

1 Parent(s): 543ca9a

Create app.py

Browse files

Files changed (1) hide show

app.py +421 -0

app.py ADDED Viewed

	@@ -0,0 +1,421 @@

+import gradio as gr
+import torch
+import numpy as np
+import cv2
+from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
+from PIL import Image
+import json
+from scipy import ndimage
+from scipy.ndimage import binary_opening, binary_closing, sobel, binary_dilation, median_filter
+import zipfile
+import io
+import os
+# グローバル変数
+device = "cuda" if torch.cuda.is_available() else "cpu"
+processor = None
+model = None
+# カテゴリ定義
+ADE20K_TO_CITY_MAPPING = {
+    'road': 'road', 'street': 'road', 'path': 'road', 'sidewalk': 'road',
+    'building': 'building_c', 'house': 'building_a', 'skyscraper': 'building_e',
+    'highrise': 'building_e', 'tower': 'building_e',
+    'office': 'building_d', 'shop': 'building_b', 'store': 'building_b',
+    'apartment': 'building_c', 'hotel': 'building_d',
+    'tree': 'forest', 'plant': 'forest', 'palm': 'forest',
+    'grass': 'park', 'field': 'park', 'flower': 'park',
+    'water': 'water', 'sea': 'water', 'river': 'water', 'lake': 'water',
+    'earth': 'bare_land', 'sand': 'bare_land', 'ground': 'bare_land',
+    'parking lot': 'infrastructure', 'stadium': 'building_d',
+}
+CITY_CATEGORIES = {
+    'road': {'label': '道路', 'color': (128, 64, 128), 'height': 0, 'semantic_id': 0},
+    'forest': {'label': '森林', 'color': (34, 139, 34), 'height': 1.5, 'semantic_id': 1},
+    'park': {'label': '公園/緑地', 'color': (144, 238, 144), 'height': 0.5, 'semantic_id': 2},
+    'water': {'label': '水域', 'color': (30, 144, 255), 'height': 0, 'semantic_id': 3},
+    'building_a': {'label': '建物A（小）', 'color': (255, 200, 150), 'height': 0.6, 'semantic_id': 4},
+    'building_b': {'label': '建物B（中小）', 'color': (255, 160, 122), 'height': 1.0, 'semantic_id': 5},
+    'building_c': {'label': '建物C（中）', 'color': (240, 120, 90), 'height': 1.5, 'semantic_id': 6},
+    'building_d': {'label': '建物D（中大）', 'color': (220, 80, 60), 'height': 2.2, 'semantic_id': 7},
+    'building_e': {'label': '建物E（大）', 'color': (200, 40, 40), 'height': 3.0, 'semantic_id': 8},
+    'bare_land': {'label': '空き地', 'color': (210, 180, 140), 'height': 0.1, 'semantic_id': 9},
+    'infrastructure': {'label': 'インフラ', 'color': (100, 100, 100), 'height': 0.8, 'semantic_id': 10},
+    'other': {'label': 'その他/境界', 'color': (80, 80, 80), 'height': 0, 'semantic_id': 11}
+}
+def load_model():
+    global processor, model
+    if processor is None or model is None:
+        processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b5-finetuned-ade-640-640")
+        model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b5-finetuned-ade-640-640").to(device)
+    return processor, model
+def map_ade20k_to_city(class_id, id2label):
+    if class_id not in id2label:
+        return 'other'
+    class_name = id2label[class_id].lower()
+    for ade_name, city_cat in ADE20K_TO_CITY_MAPPING.items():
+        if ade_name in class_name:
+            return city_cat
+    if any(w in class_name for w in ['skyscraper', 'highrise', 'tower']):
+        return 'building_e'
+    elif any(w in class_name for w in ['office', 'hotel', 'commercial', 'stadium']):
+        return 'building_d'
+    elif any(w in class_name for w in ['building', 'apartment']):
+        return 'building_c'
+    elif any(w in class_name for w in ['shop', 'store', 'market']):
+        return 'building_b'
+    elif any(w in class_name for w in ['house', 'home', 'shed', 'hut']):
+        return 'building_a'
+    return 'other'
+def segment_with_tiling(image, processor, model, tile_size=320, overlap=64, use_tiling=True):
+    h, w = image.shape[:2]
+    if not use_tiling or (h <= tile_size and w <= tile_size):
+        pil_image = Image.fromarray(image)
+        inputs = processor(images=pil_image, return_tensors="pt").to(device)
+        with torch.no_grad():
+            outputs = model(**inputs)
+            logits = outputs.logits
+        upsampled_logits = torch.nn.functional.interpolate(
+            logits, size=image.shape[:2], mode="bilinear", align_corners=False
+        )
+        return upsampled_logits.argmax(dim=1)[0].cpu().numpy()
+    stride = tile_size - overlap
+    num_tiles_h = (h - overlap) // stride + (1 if (h - overlap) % stride > 0 else 0)
+    num_tiles_w = (w - overlap) // stride + (1 if (w - overlap) % stride > 0 else 0)
+    votes = np.zeros((h, w, 150), dtype=np.float32)
+    counts = np.zeros((h, w), dtype=np.float32)
+    for i in range(num_tiles_h):
+        for j in range(num_tiles_w):
+            y_start = i * stride
+            x_start = j * stride
+            y_end = min(y_start + tile_size, h)
+            x_end = min(x_start + tile_size, w)
+            tile = image[y_start:y_end, x_start:x_end]
+            pil_tile = Image.fromarray(tile)
+            inputs = processor(images=pil_tile, return_tensors="pt").to(device)
+            with torch.no_grad():
+                outputs = model(**inputs)
+                logits = outputs.logits
+            upsampled = torch.nn.functional.interpolate(
+                logits, size=tile.shape[:2], mode="bilinear", align_corners=False
+            )
+            probs = torch.nn.functional.softmax(upsampled, dim=1)[0].cpu().numpy()
+            votes[y_start:y_end, x_start:x_end] += probs.transpose(1, 2, 0)
+            counts[y_start:y_end, x_start:x_end] += 1
+    counts = np.maximum(counts, 1)
+    final_votes = votes / counts[:, :, np.newaxis]
+    return final_votes.argmax(axis=2)
+def create_colored_segmentation(seg_map):
+    h, w = seg_map.shape
+    colored = np.zeros((h, w, 3), dtype=np.uint8)
+    for cat_name, cat_info in CITY_CATEGORIES.items():
+        mask = seg_map == cat_info['semantic_id']
+        colored[mask] = cat_info['color']
+    return colored
+def detect_boundaries(segmentation_map, thickness=5):
+    edges_h = np.abs(sobel(segmentation_map.astype(float), axis=0)) > 0
+    edges_v = np.abs(sobel(segmentation_map.astype(float), axis=1)) > 0
+    boundaries = edges_h | edges_v
+    if thickness > 1:
+        kernel = np.ones((thickness, thickness), dtype=bool)
+        boundaries = binary_dilation(boundaries, structure=kernel)
+    other_id = CITY_CATEGORIES['other']['semantic_id']
+    segmentation_map[boundaries] = other_id
+    return segmentation_map, boundaries
+def create_3d_mesh(segments, image, resolution=2):
+    height, width = image.shape[:2]
+    meshes_data = []
+    for idx, segment in enumerate(segments):
+        segmentation = segment['segmentation']
+        bbox = segment['bbox']
+        x, y, w, h = bbox
+        if w < 3 or h < 3:
+            continue
+        segment_area = segmentation[y:y+h, x:x+w]
+        segment_image = image[y:y+h, x:x+w]
+        if not segment_area.any():
+            continue
+        vertices = []
+        faces = []
+        colors = []
+        step = resolution
+        building_height = segment['height'] * 0.5
+        for sy in range(0, segment_area.shape[0] - step, step):
+            for sx in range(0, segment_area.shape[1] - step, step):
+                if not segment_area[sy, sx]:
+                    continue
+                world_x = (x + sx - width/2) * 0.1
+                world_z = (y + sy - height/2) * 0.1
+                base_idx = len(vertices)
+                vertices.extend([
+                    [float(world_x), float(building_height), float(world_z)],
+                    [float(world_x + step*0.1), float(building_height), float(world_z)],
+                    [float(world_x + step*0.1), float(building_height), float(world_z + step*0.1)],
+                    [float(world_x), float(building_height), float(world_z + step*0.1)]
+                ])
+                vertices.extend([
+                    [float(world_x), 0.0, float(world_z)],
+                    [float(world_x + step*0.1), 0.0, float(world_z)],
+                    [float(world_x + step*0.1), 0.0, float(world_z + step*0.1)],
+                    [float(world_x), 0.0, float(world_z + step*0.1)]
+                ])
+                if sy < segment_image.shape[0] and sx < segment_image.shape[1]:
+                    color = segment_image[sy, sx] / 255.0
+                    color_list = [float(color[0]), float(color[1]), float(color[2])]
+                else:
+                    color_list = [0.5, 0.5, 0.5]
+                wall_color = [c * 0.7 for c in color_list]
+                colors.extend([color_list] * 4 + [wall_color] * 4)
+                faces.extend([
+                    [base_idx, base_idx+1, base_idx+2],
+                    [base_idx, base_idx+2, base_idx+3]
+                ])
+        if len(vertices) > 0:
+            meshes_data.append({
+                'id': int(idx),
+                'category': str(segment['category']),
+                'label': str(segment['label']),
+                'semantic_id': int(segment['semantic_id']),
+                'vertices': vertices,
+                'faces': faces,
+                'colors': colors,
+                'center': [
+                    float((x + w/2 - width/2) * 0.1),
+                    float(segment['height'] * 0.5),
+                    float((y + h/2 - height/2) * 0.1)
+                ],
+                'bbox': [int(x), int(y), int(w), int(h)],
+                'area': float(segment['area']),
+                'height': float(segment['height'])
+            })
+    return meshes_data
+def process_image(image, max_size, tile_size, tile_overlap, min_area, mesh_res,
+                 apply_morphology, morph_kernel, detect_bound, bound_thickness,
+                 apply_smoothing, smooth_iter, use_tiling):
+    # モデルロード
+    processor, model = load_model()
+    # 画像前処理
+    if isinstance(image, str):
+        original_image = cv2.imread(image)
+        original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
+    else:
+        original_image = np.array(image)
+    original_height, original_width = original_image.shape[:2]
+    if max(original_height, original_width) > max_size:
+        scale_factor = max_size / max(original_height, original_width)
+        new_width = int(original_width * scale_factor)
+        new_height = int(original_height * scale_factor)
+        resized_image = cv2.resize(original_image, (new_width, new_height), interpolation=cv2.INTER_LANCZOS4)
+    else:
+        resized_image = original_image.copy()
+    # セグメンテーション
+    predicted_seg = segment_with_tiling(resized_image, processor, model, tile_size, tile_overlap, use_tiling)
+    # クラスマッピング
+    city_segmentation = np.zeros(predicted_seg.shape, dtype=np.uint8)
+    id2label = model.config.id2label
+    for class_id in np.unique(predicted_seg):
+        city_category = map_ade20k_to_city(class_id, id2label)
+        semantic_id = CITY_CATEGORIES[city_category]['semantic_id']
+        mask = predicted_seg == class_id
+        city_segmentation[mask] = semantic_id
+    # クラス平滑化
+    if apply_smoothing:
+        for _ in range(smooth_iter):
+            city_segmentation = median_filter(city_segmentation, size=3)
+    # 境界検出
+    if detect_bound:
+        city_segmentation, boundary_mask = detect_boundaries(city_segmentation, bound_thickness)
+    # セグメント抽出
+    segments_data = []
+    segment_id = 0
+    for cat_name, cat_info in CITY_CATEGORIES.items():
+        semantic_id = cat_info['semantic_id']
+        mask = city_segmentation == semantic_id
+        if not mask.any():
+            continue
+        if apply_morphology:
+            kernel = np.ones((morph_kernel, morph_kernel), dtype=bool)
+            mask = binary_opening(mask, structure=kernel)
+            mask = binary_closing(mask, structure=kernel)
+        labeled, num_features = ndimage.label(mask)
+        for i in range(1, num_features + 1):
+            segment_mask = labeled == i
+            area = np.sum(segment_mask)
+            if area < min_area:
+                continue
+            rows, cols = np.where(segment_mask)
+            if len(rows) == 0:
+                continue
+            y_min, y_max = rows.min(), rows.max()
+            x_min, x_max = cols.min(), cols.max()
+            segments_data.append({
+                'id': segment_id,
+                'category': cat_name,
+                'label': cat_info['label'],
+                'semantic_id': semantic_id,
+                'color': cat_info['color'],
+                'height': cat_info['height'],
+                'segmentation': segment_mask,
+                'bbox': [int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)],
+                'area': int(area)
+            })
+            segment_id += 1
+    # 3Dメッシュ生成
+    meshes = create_3d_mesh(segments_data, resized_image, mesh_res)
+    # メタデータ
+    metadata = {
+        'version': '2.1',
+        'total_segments': len(meshes),
+        'categories': {}
+    }
+    for mesh in meshes:
+        cat = mesh['category']
+        if cat not in metadata['categories']:
+            metadata['categories'][cat] = {'label': mesh['label'], 'count': 0}
+        metadata['categories'][cat]['count'] += 1
+    # 可視化
+    colored_seg = create_colored_segmentation(city_segmentation)
+    overlay = (resized_image.astype(np.float32) * 0.5 + colored_seg.astype(np.float32) * 0.5).astype(np.uint8)
+    # JSONファイル作成
+    output_data = {'metadata': metadata, 'meshes': meshes}
+    json_str = json.dumps(output_data, ensure_ascii=False, indent=2)
+    # ZIPファイル作成
+    zip_buffer = io.BytesIO()
+    with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+        zip_file.writestr('city_3d_model.json', json_str)
+        # セグメンテーション画像
+        _, buffer = cv2.imencode('.png', cv2.cvtColor(colored_seg, cv2.COLOR_RGB2BGR))
+        zip_file.writestr('segmentation_result.png', buffer.tobytes())
+    zip_buffer.seek(0)
+    # 統計情報
+    stats = f"総セグメント数: {len(meshes)}\n\n"
+    for cat, info in metadata['categories'].items():
+        stats += f"{info['label']}: {info['count']}個\n"
+    return colored_seg, overlay, stats, zip_buffer.getvalue()
+# Gradio UI
+with gr.Blocks(title="3D City Map Generator") as demo:
+    gr.Markdown("# 🏙️ 3D City Map Generator")
+    gr.Markdown("航空写真から3D都市マップを生成します（Segformer B5モデル使用）")
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(label="航空写真をアップロード", type="numpy")
+            with gr.Accordion("⚙️ 詳細設定", open=False):
+                max_size = gr.Slider(640, 2048, value=800, step=64, label="最大画像サイズ")
+                use_tiling = gr.Checkbox(value=True, label="タイル分割処理を使用")
+                tile_size = gr.Slider(120, 640, value=320, step=40, label="タイルサイズ")
+                tile_overlap = gr.Slider(32, 128, value=64, step=8, label="タイル重複")
+                min_area = gr.Slider(20, 200, value=32, step=4, label="最小セグメント面積")
+                mesh_res = gr.Slider(1, 4, value=3, step=1, label="メッシュ解像度")
+                apply_morphology = gr.Checkbox(value=True, label="モルフォロジー処理")
+                morph_kernel = gr.Slider(3, 9, value=7, step=2, label="モルフォロジーカーネル")
+                detect_bound = gr.Checkbox(value=True, label="境界検出")
+                bound_thickness = gr.Slider(1, 5, value=5, step=1, label="境界の太さ")
+                apply_smoothing = gr.Checkbox(value=True, label="クラス平滑化")
+                smooth_iter = gr.Slider(1, 3, value=2, step=1, label="平滑化反復回数")
+            process_btn = gr.Button("🚀 3Dマップ生成", variant="primary")
+        with gr.Column():
+            seg_output = gr.Image(label="セグメンテーション結果")
+            overlay_output = gr.Image(label="オーバーレイ")
+            stats_output = gr.Textbox(label="統計情報", lines=10)
+            download_output = gr.File(label="📥 3Dモデルをダウンロード (ZIP)")
+    process_btn.click(
+        fn=process_image,
+        inputs=[input_image, max_size, tile_size, tile_overlap, min_area, mesh_res,
+                apply_morphology, morph_kernel, detect_bound, bound_thickness,
+                apply_smoothing, smooth_iter, use_tiling],
+        outputs=[seg_output, overlay_output, stats_output, download_output]
+    )
+    gr.Markdown("""
+    ### 使い方
+    1. 航空写真をアップロード
+    2. 必要に応じてパラメータを調整
+    3. 「3Dマップ生成」をクリック
+    4. ZIPファイルをダウンロードして、JSONファイルをBlenderなどで使用
+    ### パラメータ説明
+    - **最大画像サイズ**: 大きいほど精度向上（処理時間増加）
+    - **タイル分割**: 大きな画像の精度向上に重要
+    - **最小セグメント面積**: 増やすとノイズ削減
+    - **メッシュ解像度**: 増やすとファイルサイズ減少
+    - **境界検出**: 建物と道路の混合を防ぐ
+    """)
+if __name__ == "__main__":
+    demo.launch()