Spaces:

ColamanAI
/

Colaman-segmap

Running

App Files Files Community

ColamanAI commited on Oct 14, 2025

Commit

c5c9c70

verified ·

1 Parent(s): 0efdb24

Upload app.py

Browse files

Files changed (1) hide show

app.py +159 -57

app.py CHANGED Viewed

@@ -682,6 +682,7 @@ def run_model(
     filter_white_bg=False,
     enable_segmentation=False,
     text_prompt=DEFAULT_TEXT_PROMPT,
 ):
     """
     Run the MapAnything model + optional segmentation
@@ -689,12 +690,14 @@ def run_model(
     global model
     import torch
     print(f"Processing images from {target_dir}")
     device = "cuda" if torch.cuda.is_available() else "cpu"
     device = torch.device(device)
     # Initialize MapAnything model
     if model is None:
         model = initialize_mapanything_model(high_level_config, device)
     else:
@@ -704,10 +707,12 @@ def run_model(
     # Load segmentation models if enabled
     if enable_segmentation:
         load_grounding_dino_model(device)
         load_sam_model(device)
     # Load images
     print("Loading images...")
     image_folder_path = os.path.join(target_dir, "images")
     views = load_images(image_folder_path)
@@ -717,12 +722,14 @@ def run_model(
         raise ValueError("No images found. Check your upload.")
     # Run model inference
     print("Running inference...")
     outputs = model.infer(
         views, apply_mask=apply_mask, mask_edges=True, memory_efficient_inference=False
     )
     # Convert predictions
     predictions = {}
     extrinsic_list = []
     intrinsic_list = []
@@ -768,6 +775,7 @@ def run_model(
     predictions["final_mask"] = np.stack(final_mask_list, axis=0)
     # Process visualization data
     processed_data = process_predictions_for_visualization(
         predictions, views, high_level_config, filter_black_bg, filter_white_bg
     )
@@ -775,6 +783,7 @@ def run_model(
     # Segmentation processing
     segmented_glb = None
     if enable_segmentation and grounding_dino_model is not None:
         print("\n🎯 Starting segmentation...")
         print(f"🔍 Detection prompt: {text_prompt[:100]}...")
@@ -782,6 +791,8 @@ def run_model(
         all_view_masks = []
         for view_idx, ref_image in enumerate(images_list):
             print(f"\n📸 Processing view {view_idx + 1}/{len(images_list)}...")
             if ref_image.dtype != np.uint8:
@@ -810,17 +821,21 @@ def run_model(
         # Match objects across views
         if any(len(dets) > 0 for dets in all_view_detections):
             object_id_map, unique_objects = match_objects_across_views(all_view_detections)
             # Generate segmented mesh
             segmented_glb = create_multi_view_segmented_mesh(
                 processed_data, all_view_detections, all_view_masks,
                 object_id_map, unique_objects, target_dir
             )
     # Cleanup
     torch.cuda.empty_cache()
     return predictions, processed_data, segmented_glb
@@ -1079,17 +1094,17 @@ def gradio_demo(
     show_cam=True,
     filter_black_bg=False,
     filter_white_bg=False,
-    conf_thres=3.0,
     apply_mask=True,
     show_mesh=True,
     enable_segmentation=False,
     text_prompt=DEFAULT_TEXT_PROMPT,
-    use_sam=True,
 ):
     """执行重建"""
     if not os.path.isdir(target_dir) or target_dir == "None":
         return None, None, "❌ 未找到有效的目标目录，请先上传文件", None, None, None, None, None, None, None, None, None
     start_time = time.time()
     gc.collect()
     torch.cuda.empty_cache()
@@ -1103,14 +1118,16 @@ def gradio_demo(
     all_files = [f"{i}: {filename}" for i, filename in enumerate(all_files)]
     frame_filter_choices = ["All"] + all_files
     print("运行 MapAnything 模型...")
     with torch.no_grad():
         predictions, processed_data, segmented_glb = run_model(
             target_dir, apply_mask, True, filter_black_bg, filter_white_bg,
-            enable_segmentation, text_prompt
         )
     # 保存预测结果
     prediction_save_path = os.path.join(target_dir, "predictions.npz")
     np.savez(prediction_save_path, **predictions)
@@ -1118,6 +1135,7 @@ def gradio_demo(
         frame_filter = "All"
     # 生成 GLB 文件名
     glbfile = os.path.join(
         target_dir,
         f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_mesh{show_mesh}.glb",
@@ -1131,20 +1149,21 @@ def gradio_demo(
         mask_black_bg=filter_black_bg,
         mask_white_bg=filter_white_bg,
         as_mesh=show_mesh,
-        conf_percentile=conf_thres,
     )
     glbscene.export(file_obj=glbfile)
     # 清理内存
     del predictions
     gc.collect()
     torch.cuda.empty_cache()
     end_time = time.time()
     print(f"总耗时: {end_time - start_time:.2f}秒")
-    log_msg = f"✅ 重建成功 ({len(all_files)} 帧)"
     # Populate visualization tabs
     depth_vis, normal_vis, measure_img, measure_pts = populate_visualization_tabs(
         processed_data
     )
@@ -1153,6 +1172,8 @@ def gradio_demo(
     depth_selector, normal_selector, measure_selector = update_view_selectors(
         processed_data
     )
     return (
         glbfile,
@@ -1428,7 +1449,6 @@ def update_visualization(
     frame_filter,
     show_cam,
     is_example,
-    conf_thres=None,
     filter_black_bg=False,
     filter_white_bg=False,
     show_mesh=True,
@@ -1459,7 +1479,6 @@ def update_visualization(
         mask_black_bg=filter_black_bg,
         mask_white_bg=filter_white_bg,
         as_mesh=show_mesh,
-        conf_percentile=conf_thres,
     )
     glbscene.export(file_obj=glbfile)
@@ -1578,7 +1597,7 @@ def load_example_scene(scene_name, examples_dir="examples"):
             break
     if selected_scene is None:
-        return None, None, None, None, "❌ 场景未找到"
     file_objects = []
     for image_path in selected_scene["image_files"]:
@@ -1587,7 +1606,6 @@ def load_example_scene(scene_name, examples_dir="examples"):
     target_dir, image_paths = handle_uploads(file_objects, 1.0)
     return (
-        None,
         None,
         target_dir,
         image_paths,
@@ -1656,35 +1674,45 @@ with gr.Blocks(theme=theme, css=CUSTOM_CSS, title="MapAnything V2 - 3D重建与
         with gr.Column(scale=1, min_width=300):
             gr.Markdown("### 📤 输入")
-            with gr.Tabs():
-                with gr.Tab("📷 图片"):
-                    input_images = gr.File(
-                        file_count="multiple",
-                        label="上传多张图片（推荐3-10张）",
-                        interactive=True,
-                        height=200
-                    )
-                with gr.Tab("🎥 视频"):
-                    input_video = gr.Video(
-                        label="上传视频",
-                        interactive=True,
-                        height=300
-                    )
-                    s_time_interval = gr.Slider(
-                        minimum=0.1, maximum=5.0, value=1.0, step=0.1,
-                        label="帧采样间隔（秒）", interactive=True
-                    )
             image_gallery = gr.Gallery(
                 label="图片预览", columns=3, height=350,
                 show_download_button=True, object_fit="contain", preview=True
             )
             with gr.Row():
                 submit_btn = gr.Button("🚀 开始重建", variant="primary", scale=2)
                 clear_btn = gr.ClearButton(
-                    [input_video, input_images, target_dir_output, image_gallery],
                     value="🗑️ 清空", scale=1
                 )
@@ -1755,18 +1783,14 @@ with gr.Blocks(theme=theme, css=CUSTOM_CSS, title="MapAnything V2 - 3D重建与
                 max_lines=1
             )
-    # 高级选项（可折叠）
-    with gr.Accordion("⚙️ 高级选项", open=False):
         with gr.Row(equal_height=False):
             with gr.Column(scale=1, min_width=300):
                 gr.Markdown("#### 可视化参数")
                 frame_filter = gr.Dropdown(
                     choices=["All"], value="All", label="显示帧"
                 )
-                conf_thres = gr.Slider(
-                    minimum=0, maximum=100, value=0, step=0.1,
-                    label="置信度阈值（百分位）"
-                )
                 show_cam = gr.Checkbox(label="显示相机", value=True)
                 show_mesh = gr.Checkbox(label="显示网格", value=True)
                 filter_black_bg = gr.Checkbox(label="过滤黑色背景", value=False)
@@ -1821,7 +1845,7 @@ with gr.Blocks(theme=theme, css=CUSTOM_CSS, title="MapAnything V2 - 3D重建与
                                 scene_img.select(
                                     fn=lambda name=scene["name"]: load_example_scene(name),
                                     outputs=[
-                                        reconstruction_output, segmented_output,
                                         target_dir_output, image_gallery, log_output
                                     ]
                                 )
@@ -1839,32 +1863,110 @@ with gr.Blocks(theme=theme, css=CUSTOM_CSS, title="MapAnything V2 - 3D重建与
     )
     # 上传文件自动更新
-    def update_gallery_on_unified_upload(files_video, files_images, interval):
-        if not files_video and not files_images:
-            return None, None, None, None
-        # Combine both inputs
-        all_files = []
-        if files_video:
-            all_files.append(files_video)
-        if files_images:
-            all_files.extend(files_images)
-        target_dir, image_paths = handle_uploads(all_files, interval)
         return (
-            None,
             target_dir,
             image_paths,
             "✅ 上传完成，点击「开始重建」进行 3D 处理",
         )
-    input_video.change(
         fn=update_gallery_on_unified_upload,
-        inputs=[input_video, input_images, s_time_interval],
-        outputs=[segmented_output, target_dir_output, image_gallery, log_output]
     )
-    input_images.change(
-        fn=update_gallery_on_unified_upload,
-        inputs=[input_video, input_images, s_time_interval],
-        outputs=[segmented_output, target_dir_output, image_gallery, log_output]
     )
     # 重建按钮
@@ -1878,7 +1980,7 @@ with gr.Blocks(theme=theme, css=CUSTOM_CSS, title="MapAnything V2 - 3D重建与
         fn=gradio_demo,
         inputs=[
             target_dir_output, frame_filter, show_cam,
-            filter_black_bg, filter_white_bg, conf_thres,
             apply_mask_checkbox, show_mesh,
             enable_segmentation, text_prompt
         ],
@@ -1896,12 +1998,12 @@ with gr.Blocks(theme=theme, css=CUSTOM_CSS, title="MapAnything V2 - 3D重建与
     clear_btn.add([reconstruction_output, segmented_output, log_output])
     # 可视化参数实时更新
-    for component in [frame_filter, show_cam, conf_thres, show_mesh]:
         component.change(
             fn=update_visualization,
             inputs=[
                 target_dir_output, frame_filter, show_cam, is_example,
-                conf_thres, filter_black_bg, filter_white_bg, show_mesh
             ],
             outputs=[reconstruction_output, log_output]
         )

     filter_white_bg=False,
     enable_segmentation=False,
     text_prompt=DEFAULT_TEXT_PROMPT,
+    progress=gr.Progress(),
 ):
     """
     Run the MapAnything model + optional segmentation
     global model
     import torch
+    progress(0, desc="🔧 初始化设备...")
     print(f"Processing images from {target_dir}")
     device = "cuda" if torch.cuda.is_available() else "cpu"
     device = torch.device(device)
     # Initialize MapAnything model
+    progress(0.05, desc="📥 加载 MapAnything 模型...")
     if model is None:
         model = initialize_mapanything_model(high_level_config, device)
     else:
     # Load segmentation models if enabled
     if enable_segmentation:
+        progress(0.1, desc="🎯 加载分割模型...")
         load_grounding_dino_model(device)
         load_sam_model(device)
     # Load images
+    progress(0.15, desc="📷 加载图片...")
     print("Loading images...")
     image_folder_path = os.path.join(target_dir, "images")
     views = load_images(image_folder_path)
         raise ValueError("No images found. Check your upload.")
     # Run model inference
+    progress(0.2, desc=f"🚀 运行 3D 重建 ({len(views)} 张图片)...")
     print("Running inference...")
     outputs = model.infer(
         views, apply_mask=apply_mask, mask_edges=True, memory_efficient_inference=False
     )
     # Convert predictions
+    progress(0.5, desc="🔄 处理预测结果...")
     predictions = {}
     extrinsic_list = []
     intrinsic_list = []
     predictions["final_mask"] = np.stack(final_mask_list, axis=0)
     # Process visualization data
+    progress(0.6, desc="🎨 准备可视化数据...")
     processed_data = process_predictions_for_visualization(
         predictions, views, high_level_config, filter_black_bg, filter_white_bg
     )
     # Segmentation processing
     segmented_glb = None
     if enable_segmentation and grounding_dino_model is not None:
+        progress(0.65, desc="🎯 开始物体分割...")
         print("\n🎯 Starting segmentation...")
         print(f"🔍 Detection prompt: {text_prompt[:100]}...")
         all_view_masks = []
         for view_idx, ref_image in enumerate(images_list):
+            progress(0.65 + (view_idx / len(images_list)) * 0.2,
+                    desc=f"🔍 检测视图 {view_idx + 1}/{len(images_list)}...")
             print(f"\n📸 Processing view {view_idx + 1}/{len(images_list)}...")
             if ref_image.dtype != np.uint8:
         # Match objects across views
         if any(len(dets) > 0 for dets in all_view_detections):
+            progress(0.85, desc="🔗 匹配跨视图物体...")
             object_id_map, unique_objects = match_objects_across_views(all_view_detections)
             # Generate segmented mesh
+            progress(0.9, desc="🏗️ 生成分割3D模型...")
             segmented_glb = create_multi_view_segmented_mesh(
                 processed_data, all_view_detections, all_view_masks,
                 object_id_map, unique_objects, target_dir
             )
     # Cleanup
+    progress(0.95, desc="🧹 清理内存...")
     torch.cuda.empty_cache()
+    progress(1.0, desc="✅ 完成！")
     return predictions, processed_data, segmented_glb
     show_cam=True,
     filter_black_bg=False,
     filter_white_bg=False,
     apply_mask=True,
     show_mesh=True,
     enable_segmentation=False,
     text_prompt=DEFAULT_TEXT_PROMPT,
+    progress=gr.Progress(),
 ):
     """执行重建"""
     if not os.path.isdir(target_dir) or target_dir == "None":
         return None, None, "❌ 未找到有效的目标目录，请先上传文件", None, None, None, None, None, None, None, None, None
+    progress(0, desc="🔄 准备重建...")
     start_time = time.time()
     gc.collect()
     torch.cuda.empty_cache()
     all_files = [f"{i}: {filename}" for i, filename in enumerate(all_files)]
     frame_filter_choices = ["All"] + all_files
+    progress(0.05, desc="🚀 运行 MapAnything 模型...")
     print("运行 MapAnything 模型...")
     with torch.no_grad():
         predictions, processed_data, segmented_glb = run_model(
             target_dir, apply_mask, True, filter_black_bg, filter_white_bg,
+            enable_segmentation, text_prompt, progress
         )
     # 保存预测结果
+    progress(0.92, desc="💾 保存预测结果...")
     prediction_save_path = os.path.join(target_dir, "predictions.npz")
     np.savez(prediction_save_path, **predictions)
         frame_filter = "All"
     # 生成 GLB 文件名
+    progress(0.93, desc="🏗️ 生成原始3D模型...")
     glbfile = os.path.join(
         target_dir,
         f"glbscene_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}_mesh{show_mesh}.glb",
         mask_black_bg=filter_black_bg,
         mask_white_bg=filter_white_bg,
         as_mesh=show_mesh,
     )
     glbscene.export(file_obj=glbfile)
     # 清理内存
+    progress(0.96, desc="🧹 清理内存...")
     del predictions
     gc.collect()
     torch.cuda.empty_cache()
     end_time = time.time()
     print(f"总耗时: {end_time - start_time:.2f}秒")
+    log_msg = f"✅ 重建成功 ({len(all_files)} 帧，耗时 {end_time - start_time:.1f}秒)"
     # Populate visualization tabs
+    progress(0.98, desc="🎨 生成可视化...")
     depth_vis, normal_vis, measure_img, measure_pts = populate_visualization_tabs(
         processed_data
     )
     depth_selector, normal_selector, measure_selector = update_view_selectors(
         processed_data
     )
+    progress(1.0, desc="✅ 全部完成！")
     return (
         glbfile,
     frame_filter,
     show_cam,
     is_example,
     filter_black_bg=False,
     filter_white_bg=False,
     show_mesh=True,
         mask_black_bg=filter_black_bg,
         mask_white_bg=filter_white_bg,
         as_mesh=show_mesh,
     )
     glbscene.export(file_obj=glbfile)
             break
     if selected_scene is None:
+        return None, None, None, "❌ 场景未找到"
     file_objects = []
     for image_path in selected_scene["image_files"]:
     target_dir, image_paths = handle_uploads(file_objects, 1.0)
     return (
         None,
         target_dir,
         image_paths,
         with gr.Column(scale=1, min_width=300):
             gr.Markdown("### 📤 输入")
+            unified_upload = gr.File(
+                file_count="multiple",
+                label="上传视频或图片",
+                interactive=True,
+                file_types=["image", "video"],
+            )
+            with gr.Row():
+                s_time_interval = gr.Slider(
+                    minimum=0.1, maximum=5.0, value=1.0, step=0.1,
+                    label="视频采样时间间隔（每x秒取一帧）",
+                    interactive=True,
+                    visible=True,
+                    scale=3,
+                )
+                resample_btn = gr.Button(
+                    "重新采样视频",
+                    visible=False,
+                    variant="secondary",
+                    scale=1,
+                )
             image_gallery = gr.Gallery(
                 label="图片预览", columns=3, height=350,
                 show_download_button=True, object_fit="contain", preview=True
             )
+            clear_uploads_btn = gr.ClearButton(
+                [unified_upload, image_gallery],
+                value="清空上传",
+                variant="secondary",
+                size="sm",
+            )
             with gr.Row():
                 submit_btn = gr.Button("🚀 开始重建", variant="primary", scale=2)
                 clear_btn = gr.ClearButton(
+                    [unified_upload, target_dir_output, image_gallery],
                     value="🗑️ 清空", scale=1
                 )
                 max_lines=1
             )
+    # 高级选项（默认打开）
+    with gr.Accordion("⚙️ 高级选项", open=True):
         with gr.Row(equal_height=False):
             with gr.Column(scale=1, min_width=300):
                 gr.Markdown("#### 可视化参数")
                 frame_filter = gr.Dropdown(
                     choices=["All"], value="All", label="显示帧"
                 )
                 show_cam = gr.Checkbox(label="显示相机", value=True)
                 show_mesh = gr.Checkbox(label="显示网格", value=True)
                 filter_black_bg = gr.Checkbox(label="过滤黑色背景", value=False)
                                 scene_img.select(
                                     fn=lambda name=scene["name"]: load_example_scene(name),
                                     outputs=[
+                                        reconstruction_output,
                                         target_dir_output, image_gallery, log_output
                                     ]
                                 )
     )
     # 上传文件自动更新
+    def update_gallery_on_unified_upload(files, interval):
+        if not files:
+            return None, None, None
+        target_dir, image_paths = handle_uploads(files, interval)
         return (
             target_dir,
             image_paths,
             "✅ 上传完成，点击「开始重建」进行 3D 处理",
         )
+    def show_resample_button(files):
+        """仅当上传的文件包含视频时显示重新采样按钮"""
+        if not files:
+            return gr.update(visible=False)
+        # 检查是否有视频文件
+        video_extensions = [
+            ".mp4", ".avi", ".mov", ".mkv", ".wmv", ".flv", ".webm", ".m4v", ".3gp",
+        ]
+        has_video = False
+        for file_data in files:
+            if isinstance(file_data, dict) and "name" in file_data:
+                file_path = file_data["name"]
+            else:
+                file_path = str(file_data)
+            file_ext = os.path.splitext(file_path)[1].lower()
+            if file_ext in video_extensions:
+                has_video = True
+                break
+        return gr.update(visible=has_video)
+    def resample_video_with_new_interval(files, new_interval, current_target_dir):
+        """使用新的滑块值重新采样视频"""
+        if not files:
+            return (
+                current_target_dir,
+                None,
+                "没有可重新采样的文件。",
+                gr.update(visible=False),
+            )
+        # 检查是否有视频需要重新采样
+        video_extensions = [
+            ".mp4", ".avi", ".mov", ".mkv", ".wmv", ".flv", ".webm", ".m4v", ".3gp",
+        ]
+        has_video = any(
+            os.path.splitext(
+                str(file_data["name"] if isinstance(file_data, dict) else file_data)
+            )[1].lower()
+            in video_extensions
+            for file_data in files
+        )
+        if not has_video:
+            return (
+                current_target_dir,
+                None,
+                "未找到视频进行重新采样。",
+                gr.update(visible=False),
+            )
+        # 清理旧的目标目录
+        if (
+            current_target_dir
+            and current_target_dir != "None"
+            and os.path.exists(current_target_dir)
+        ):
+            shutil.rmtree(current_target_dir)
+        # 使用新间隔处理文件
+        target_dir, image_paths = handle_uploads(files, new_interval)
+        return (
+            target_dir,
+            image_paths,
+            f"视频已使用 {new_interval}秒 间隔重新采样。点击「开始重建」进行 3D 处理。",
+            gr.update(visible=False),
+        )
+    unified_upload.change(
         fn=update_gallery_on_unified_upload,
+        inputs=[unified_upload, s_time_interval],
+        outputs=[target_dir_output, image_gallery, log_output]
+    ).then(
+        fn=show_resample_button,
+        inputs=[unified_upload],
+        outputs=[resample_btn],
     )
+    # 滑块改变时显示重新采样按钮（仅当已上传文件时）
+    s_time_interval.change(
+        fn=show_resample_button,
+        inputs=[unified_upload],
+        outputs=[resample_btn],
+    )
+    # 处理重新采样按钮点击
+    resample_btn.click(
+        fn=resample_video_with_new_interval,
+        inputs=[unified_upload, s_time_interval, target_dir_output],
+        outputs=[target_dir_output, image_gallery, log_output, resample_btn],
     )
     # 重建按钮
         fn=gradio_demo,
         inputs=[
             target_dir_output, frame_filter, show_cam,
+            filter_black_bg, filter_white_bg,
             apply_mask_checkbox, show_mesh,
             enable_segmentation, text_prompt
         ],
     clear_btn.add([reconstruction_output, segmented_output, log_output])
     # 可视化参数实时更新
+    for component in [frame_filter, show_cam, show_mesh]:
         component.change(
             fn=update_visualization,
             inputs=[
                 target_dir_output, frame_filter, show_cam, is_example,
+                filter_black_bg, filter_white_bg, show_mesh
             ],
             outputs=[reconstruction_output, log_output]
         )