| | """ |
| | Advanced 3D Reconstruction from Single Images with Responsible AI Features |
| | |
| | """ |
| |
|
| | import gradio as gr |
| | import numpy as np |
| | import torch |
| | from PIL import Image |
| | from transformers import GLPNForDepthEstimation, GLPNImageProcessor |
| | import open3d as o3d |
| | import plotly.graph_objects as go |
| | import matplotlib.pyplot as plt |
| | import io |
| | import json |
| | import time |
| | from pathlib import Path |
| | import tempfile |
| | import zipfile |
| | import hashlib |
| | from datetime import datetime |
| |
|
| | |
| | |
| | |
| |
|
| | RESPONSIBLE_AI_NOTICE = """ |
| | ## ⚠️ Responsible Use Guidelines |
| | |
| | ### Privacy & Consent |
| | - **Do not upload images containing identifiable people without their explicit consent** |
| | - **Do not use for surveillance, tracking, or monitoring individuals** |
| | - Facial features may be reconstructed in 3D - consider privacy implications |
| | - Remove metadata (EXIF) that may contain location or personal information |
| | |
| | ### Ethical Use |
| | - This tool is for **educational, research, and creative purposes only** |
| | - **Prohibited uses:** |
| | - Creating deepfakes or misleading 3D content |
| | - Unauthorized documentation of private property |
| | - Circumventing security systems |
| | - Generating 3D models for harassment or stalking |
| | - Commercial use without proper rights to source images |
| | |
| | ### Limitations & Bias |
| | - Models trained primarily on indoor Western architecture |
| | - May perform poorly on non-Western architectural styles |
| | - Scale is relative, not absolute - not suitable for precision measurements |
| | - Single viewpoint limitations - occluded areas are inferred, not captured |
| | |
| | ### Data Usage |
| | - Images are processed locally during your session |
| | - No images are stored or transmitted to external servers |
| | - Processing logs contain only technical metrics, no image content |
| | - You retain all rights to your uploaded images and generated 3D models |
| | |
| | |
| | **By using this tool, you agree to these responsible use guidelines.** |
| | """ |
| |
|
| | |
| | |
| | |
| |
|
| | def check_image_safety(image): |
| | """Basic safety checks for uploaded images""" |
| | warnings = [] |
| | |
| | width, height = image.size |
| | if width * height > 10_000_000: |
| | warnings.append("⚠️ Very large image - consider resizing to improve processing speed") |
| | |
| | aspect_ratio = max(width, height) / min(width, height) |
| | if aspect_ratio > 3: |
| | warnings.append("⚠️ Unusual aspect ratio detected - ensure image doesn't contain unintended content") |
| | |
| | try: |
| | exif = image.getexif() |
| | if exif: |
| | has_gps = any(k for k in exif.keys() if k in [34853, 0x8825]) |
| | if has_gps: |
| | warnings.append("⚠️ GPS location data detected in image - consider removing EXIF data for privacy") |
| | except: |
| | pass |
| | |
| | return True, "\n".join(warnings) if warnings else None |
| |
|
| | def generate_session_id(): |
| | """Generate anonymous session ID for logging""" |
| | return hashlib.sha256(str(datetime.now()).encode()).hexdigest()[:16] |
| |
|
| | def content_policy_check(image): |
| | """Check if image content violates usage policies""" |
| | width, height = image.size |
| | |
| | if width < 100 or height < 100: |
| | return False, "Image too small - minimum 100x100 pixels required for meaningful reconstruction" |
| | |
| | return True, None |
| |
|
| | |
| | |
| | |
| |
|
| | print("Loading GLPN model (lightweight)...") |
| | try: |
| | glpn_processor = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu") |
| | glpn_model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu") |
| | print("✓ GLPN model loaded successfully!") |
| | except Exception as e: |
| | print(f"Error loading model: {e}") |
| | glpn_processor = None |
| | glpn_model = None |
| |
|
| | |
| | dpt_model = None |
| | dpt_processor = None |
| |
|
| | |
| | |
| | |
| |
|
| | def process_image(image, model_choice="GLPN (Recommended)", visualization_type="mesh"): |
| | """Optimized processing pipeline""" |
| | |
| | def _generate_quality_assessment(metrics): |
| | assessment = [] |
| | outlier_pct = (metrics['outliers_removed'] / metrics['initial_points']) * 100 |
| | |
| | if outlier_pct < 5: |
| | assessment.append("Very clean depth estimation") |
| | elif outlier_pct < 15: |
| | assessment.append("Good depth quality") |
| | else: |
| | assessment.append("High noise in depth estimation") |
| | |
| | if metrics['is_edge_manifold'] and metrics['is_vertex_manifold']: |
| | assessment.append("Excellent topology") |
| | elif metrics['is_vertex_manifold']: |
| | assessment.append("Good local topology") |
| | else: |
| | assessment.append("Topology issues present") |
| | |
| | if metrics['is_watertight']: |
| | assessment.append("Watertight mesh - ready for 3D printing!") |
| | else: |
| | assessment.append("Not watertight - needs repair for 3D printing") |
| | |
| | return "\n".join(f"- {item}" for item in assessment) |
| | |
| | if glpn_model is None: |
| | return None, None, None, "❌ Model failed to load. Please refresh the page.", None |
| | |
| | try: |
| | print("Starting reconstruction...") |
| | |
| | |
| | new_height = 480 if image.height > 480 else image.height |
| | new_height -= (new_height % 32) |
| | new_width = int(new_height * image.width / image.height) |
| | diff = new_width % 32 |
| | new_width = new_width - diff if diff < 16 else new_width + (32 - diff) |
| | new_size = (new_width, new_height) |
| | image = image.resize(new_size, Image.LANCZOS) |
| | |
| | |
| | if model_choice == "GLPN (Recommended)": |
| | processor = glpn_processor |
| | model = glpn_model |
| | else: |
| | global dpt_model, dpt_processor |
| | if dpt_model is None: |
| | print("Loading DPT model (first time only)...") |
| | from transformers import DPTForDepthEstimation, DPTImageProcessor |
| | dpt_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large") |
| | dpt_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large") |
| | print("✓ DPT model loaded!") |
| | processor = dpt_processor |
| | model = dpt_model |
| | |
| | inputs = processor(images=image, return_tensors="pt") |
| | |
| | start_time = time.time() |
| | with torch.no_grad(): |
| | outputs = model(**inputs) |
| | predicted_depth = outputs.predicted_depth |
| | depth_time = time.time() - start_time |
| | |
| | |
| | pad = 16 |
| | output = predicted_depth.squeeze().cpu().numpy() * 1000.0 |
| | output = output[pad:-pad, pad:-pad] |
| | image_cropped = image.crop((pad, pad, image.width - pad, image.height - pad)) |
| | |
| | depth_height, depth_width = output.shape |
| | img_width, img_height = image_cropped.size |
| | |
| | if depth_height != img_height or depth_width != img_width: |
| | from scipy import ndimage |
| | zoom_factors = (img_height / depth_height, img_width / depth_width) |
| | output = ndimage.zoom(output, zoom_factors, order=1) |
| | |
| | image = image_cropped |
| | |
| | |
| | fig, ax = plt.subplots(1, 2, figsize=(14, 7)) |
| | ax[0].imshow(image) |
| | ax[0].set_title('Original Image', fontsize=14, fontweight='bold') |
| | ax[0].axis('off') |
| | |
| | im = ax[1].imshow(output, cmap='plasma') |
| | ax[1].set_title('Estimated Depth Map', fontsize=14, fontweight='bold') |
| | ax[1].axis('off') |
| | plt.colorbar(im, ax=ax[1], fraction=0.046, pad=0.04) |
| | plt.tight_layout() |
| | |
| | buf = io.BytesIO() |
| | plt.savefig(buf, format='png', dpi=150, bbox_inches='tight') |
| | buf.seek(0) |
| | depth_viz = Image.open(buf) |
| | plt.close() |
| | |
| | |
| | width, height = image.size |
| | |
| | if output.shape != (height, width): |
| | from scipy import ndimage |
| | zoom_factors = (height / output.shape[0], width / output.shape[1]) |
| | output = ndimage.zoom(output, zoom_factors, order=1) |
| | |
| | depth_image = (output * 255 / np.max(output)).astype(np.uint8) |
| | image_array = np.array(image) |
| | |
| | depth_o3d = o3d.geometry.Image(depth_image) |
| | image_o3d = o3d.geometry.Image(image_array) |
| | rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth( |
| | image_o3d, depth_o3d, convert_rgb_to_intensity=False |
| | ) |
| | |
| | camera_intrinsic = o3d.camera.PinholeCameraIntrinsic() |
| | camera_intrinsic.set_intrinsics(width, height, 500, 500, width/2, height/2) |
| | |
| | pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic) |
| | initial_points = len(pcd.points) |
| | |
| | |
| | cl, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0) |
| | pcd = pcd.select_by_index(ind) |
| | outliers_removed = initial_points - len(pcd.points) |
| | |
| | |
| | pcd.estimate_normals() |
| | pcd.orient_normals_to_align_with_direction() |
| | |
| | |
| | mesh_start = time.time() |
| | mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson( |
| | pcd, depth=9, n_threads=1 |
| | )[0] |
| | |
| | |
| | pcd_tree = o3d.geometry.KDTreeFlann(pcd) |
| | mesh_colors = [] |
| | for vertex in mesh.vertices: |
| | [_, idx, _] = pcd_tree.search_knn_vector_3d(vertex, 1) |
| | mesh_colors.append(pcd.colors[idx[0]]) |
| | mesh.vertex_colors = o3d.utility.Vector3dVector(np.array(mesh_colors)) |
| | |
| | rotation = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0)) |
| | mesh.rotate(rotation, center=(0, 0, 0)) |
| | mesh_time = time.time() - mesh_start |
| | |
| | |
| | mesh.compute_vertex_normals() |
| | |
| | metrics = { |
| | 'model_used': model_choice, |
| | 'depth_estimation_time': f"{depth_time:.2f}s", |
| | 'mesh_reconstruction_time': f"{mesh_time:.2f}s", |
| | 'total_time': f"{depth_time + mesh_time:.2f}s", |
| | 'initial_points': initial_points, |
| | 'outliers_removed': outliers_removed, |
| | 'final_points': len(pcd.points), |
| | 'vertices': len(mesh.vertices), |
| | 'triangles': len(mesh.triangles), |
| | 'is_edge_manifold': mesh.is_edge_manifold(), |
| | 'is_vertex_manifold': mesh.is_vertex_manifold(), |
| | 'is_watertight': mesh.is_watertight(), |
| | } |
| | |
| | |
| | try: |
| | surface_area = mesh.get_surface_area() |
| | if surface_area > 0: |
| | metrics['surface_area'] = float(surface_area) |
| | else: |
| | vertices = np.asarray(mesh.vertices) |
| | triangles = np.asarray(mesh.triangles) |
| | v0 = vertices[triangles[:, 0]] |
| | v1 = vertices[triangles[:, 1]] |
| | v2 = vertices[triangles[:, 2]] |
| | cross = np.cross(v1 - v0, v2 - v0) |
| | areas = 0.5 * np.linalg.norm(cross, axis=1) |
| | metrics['surface_area'] = float(np.sum(areas)) |
| | except: |
| | metrics['surface_area'] = "Unable to compute" |
| | |
| | |
| | try: |
| | if mesh.is_watertight(): |
| | metrics['volume'] = float(mesh.get_volume()) |
| | else: |
| | metrics['volume'] = None |
| | except: |
| | metrics['volume'] = None |
| | |
| | |
| | points = np.asarray(pcd.points) |
| | colors = np.asarray(pcd.colors) |
| | |
| | if visualization_type == "point_cloud": |
| | scatter = go.Scatter3d( |
| | x=points[:, 0], y=points[:, 1], z=points[:, 2], |
| | mode='markers', |
| | marker=dict( |
| | size=2, |
| | color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) |
| | for r, g, b in colors], |
| | ), |
| | name='Point Cloud' |
| | ) |
| | |
| | plotly_fig = go.Figure(data=[scatter]) |
| | plotly_fig.update_layout( |
| | scene=dict( |
| | xaxis=dict(visible=False), |
| | yaxis=dict(visible=False), |
| | zaxis=dict(visible=False), |
| | aspectmode='data' |
| | ), |
| | height=700, |
| | title="Point Cloud" |
| | ) |
| | else: |
| | vertices = np.asarray(mesh.vertices) |
| | triangles = np.asarray(mesh.triangles) |
| | |
| | if mesh.has_vertex_colors(): |
| | vertex_colors = np.asarray(mesh.vertex_colors) |
| | colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) |
| | for r, g, b in vertex_colors] |
| | |
| | mesh_trace = go.Mesh3d( |
| | x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], |
| | i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], |
| | vertexcolor=colors_rgb, |
| | opacity=0.95 |
| | ) |
| | else: |
| | mesh_trace = go.Mesh3d( |
| | x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], |
| | i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], |
| | color='lightblue', |
| | opacity=0.9 |
| | ) |
| | |
| | plotly_fig = go.Figure(data=[mesh_trace]) |
| | plotly_fig.update_layout( |
| | scene=dict( |
| | xaxis=dict(visible=False), |
| | yaxis=dict(visible=False), |
| | zaxis=dict(visible=False), |
| | aspectmode='data' |
| | ), |
| | height=700, |
| | title="3D Mesh" |
| | ) |
| | |
| | |
| | temp_dir = tempfile.mkdtemp() |
| | |
| | pcd_path = Path(temp_dir) / "point_cloud.ply" |
| | o3d.io.write_point_cloud(str(pcd_path), pcd) |
| | |
| | mesh_path = Path(temp_dir) / "mesh.ply" |
| | o3d.io.write_triangle_mesh(str(mesh_path), mesh) |
| | |
| | mesh_obj_path = Path(temp_dir) / "mesh.obj" |
| | o3d.io.write_triangle_mesh(str(mesh_obj_path), mesh) |
| | |
| | mesh_stl_path = Path(temp_dir) / "mesh.stl" |
| | o3d.io.write_triangle_mesh(str(mesh_stl_path), mesh) |
| | |
| | metrics_path = Path(temp_dir) / "metrics.json" |
| | with open(metrics_path, 'w') as f: |
| | json.dump(metrics, f, indent=2, default=str) |
| | |
| | zip_path = Path(temp_dir) / "reconstruction_complete.zip" |
| | with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: |
| | zipf.write(pcd_path, pcd_path.name) |
| | zipf.write(mesh_path, mesh_path.name) |
| | zipf.write(mesh_obj_path, mesh_obj_path.name) |
| | zipf.write(mesh_stl_path, mesh_stl_path.name) |
| | zipf.write(metrics_path, metrics_path.name) |
| | |
| | assessment = _generate_quality_assessment(metrics) |
| | |
| | report = f""" |
| | ## Reconstruction Complete! |
| | |
| | ### Performance |
| | - **Processing Time**: {metrics['total_time']} |
| | - **Points**: {metrics['final_points']:,} |
| | - **Triangles**: {metrics['triangles']:,} |
| | |
| | ### Quality |
| | - **Topology**: {'Good' if metrics['is_vertex_manifold'] else 'Issues'} |
| | - **Watertight**: {'Yes' if metrics['is_watertight'] else 'No'} |
| | |
| | ### Assessment |
| | {assessment} |
| | |
| | **Download the complete package below!** |
| | """ |
| | |
| | return depth_viz, plotly_fig, str(zip_path), report, json.dumps(metrics, indent=2, default=str) |
| | |
| | except Exception as e: |
| | import traceback |
| | return None, None, None, f"Error: {str(e)}\n\n{traceback.format_exc()}", None |
| |
|
| | def process_image_with_safeguards(image, model_choice="GLPN (Recommended)", visualization_type="mesh", consent_given=False): |
| | """Main processing with safeguards""" |
| | session_id = generate_session_id() |
| | |
| | if not consent_given: |
| | return None, None, None, "**You must agree to the Responsible Use Guidelines first.**", None |
| | |
| | if image is None: |
| | return None, None, None, "Please upload an image first.", None |
| | |
| | is_safe, safety_warning = check_image_safety(image) |
| | passes_policy, policy_message = content_policy_check(image) |
| | |
| | if not passes_policy: |
| | return None, None, None, f"{policy_message}", None |
| | |
| | try: |
| | result = process_image(image, model_choice, visualization_type) |
| | depth_viz, plotly_fig, zip_path, report, json_metrics = result |
| | |
| | if safety_warning: |
| | report = f"**Privacy Notice:**\n{safety_warning}\n\n{report}" |
| | |
| | metrics = json.loads(json_metrics) |
| | metrics['responsible_ai'] = { |
| | 'session_id': session_id, |
| | 'timestamp': datetime.now().isoformat(), |
| | 'consent_given': True |
| | } |
| | |
| | return depth_viz, plotly_fig, zip_path, report, json.dumps(metrics, indent=2) |
| | |
| | except Exception as e: |
| | return None, None, None, f"Error: {str(e)}", None |
| |
|
| | |
| | |
| | |
| |
|
| | with gr.Blocks(title="Responsible AI 3D Reconstruction", theme=gr.themes.Soft()) as demo: |
| | |
| | gr.Markdown(""" |
| | # 🏗️ 3D Reconstruction from Single Images |
| | |
| | |
| | Transform 2D photographs into 3D spatial models |
| | |
| | <div style="background-color: #fff3cd; border: 2px solid #ffc107; padding: 15px; border-radius: 5px; margin: 10px 0;"> |
| | <h3 style="color: #856404; margin-top: 0;">⚠️ Responsible Use Required</h3> |
| | <p style="color: #856404; margin-bottom: 0;">This tool must be used ethically and legally. Review the guidelines in the <b>first tab</b>.</p> |
| | </div> |
| | """) |
| | |
| | with gr.Tabs(): |
| | |
| | with gr.Tab("⚠️ Responsible Use (READ FIRST)"): |
| | gr.Markdown(RESPONSIBLE_AI_NOTICE) |
| | gr.Markdown(""" |
| | ### Known Limitations & Biases |
| | - Trained primarily on Western indoor architecture |
| | - May underperform on non-Western styles |
| | - Scale is relative, not absolute |
| | - Single viewpoint captures only visible surfaces |
| | """) |
| | |
| | with gr.Tab("Reconstruction"): |
| | consent_checkbox = gr.Checkbox( |
| | label="**I have read and agree to the Responsible Use Guidelines**", |
| | value=False |
| | ) |
| | |
| | with gr.Row(): |
| | with gr.Column(scale=1): |
| | input_image = gr.Image( |
| | type="pil", |
| | label="Upload Image", |
| | sources=["upload", "clipboard"] |
| | ) |
| | |
| | model_choice = gr.Radio( |
| | choices=["GLPN (Recommended)", "DPT (High Quality)"], |
| | value="GLPN (Recommended)", |
| | label="Depth Estimation Model" |
| | ) |
| | |
| | visualization_type = gr.Radio( |
| | choices=["mesh", "point_cloud"], |
| | value="mesh", |
| | label="Visualization Type" |
| | ) |
| | |
| | reconstruct_btn = gr.Button("Start Reconstruction", variant="primary", size="lg") |
| | |
| | with gr.Column(scale=2): |
| | depth_output = gr.Image(label="Depth Map") |
| | viewer_3d = gr.Plot(label="Interactive 3D Viewer") |
| | |
| | with gr.Row(): |
| | with gr.Column(): |
| | metrics_output = gr.Markdown(label="Report") |
| | with gr.Column(): |
| | json_output = gr.Textbox(label="Metrics (JSON)", lines=8) |
| | |
| | download_output = gr.File(label="Download Package (ZIP)") |
| | |
| | reconstruct_btn.click( |
| | fn=process_image_with_safeguards, |
| | inputs=[input_image, model_choice, visualization_type, consent_checkbox], |
| | outputs=[depth_output, viewer_3d, download_output, metrics_output, json_output] |
| | ) |
| | |
| | with gr.Tab("Theory & Background"): |
| | gr.Markdown(""" |
| | ## About This Tool |
| | |
| | This application demonstrates how artificial intelligence can convert single 2D photographs |
| | into interactive 3D models automatically. |
| | |
| | ### What Makes This Special |
| | |
| | **Traditional Approach:** |
| | - Need special equipment (3D scanner, multiple cameras) |
| | - Requires technical expertise |
| | - Time-consuming process |
| | - Expensive |
| | |
| | **This AI Approach:** |
| | - Works with any single photograph |
| | - No special equipment needed |
| | - Automatic processing |
| | - Free and accessible |
| | |
| | |
| | |
| | ## The Technology |
| | |
| | ### AI Model Used: GLPN |
| | |
| | **GLPN (Global-Local Path Networks)** |
| | - Paper: Kim et al., CVPR 2022 |
| | - Optimized for: Indoor/outdoor architectural scenes |
| | - Training: NYU Depth V2 (urban indoor environments) |
| | - Best for: Building interiors, street-level views |
| | - Speed: Fast (~0.3-2.5s) |
| | |
| | ### How It Works (Simplified) |
| | |
| | 1. **AI analyzes photo** → Recognizes objects, patterns, perspective |
| | 2. **Estimates distance** → Figures out what's close, what's far |
| | 3. **Creates 3D points** → Places colored dots in 3D space |
| | 4. **Builds surface** → Connects dots into smooth shape |
| | |
| | ### Spatial Data Pipeline |
| | |
| | **1. Monocular Depth Estimation** |
| | - Challenge: Extracting 3D spatial information from 2D photographs |
| | - Application: Similar to photogrammetry but from single images |
| | - Output: Relative depth maps for spatial analysis |
| | |
| | **2. Point Cloud Generation** |
| | - Creates 3D coordinate system (X, Y, Z) from pixels |
| | - Each point: Spatial location + RGB color information |
| | - Compatible with: GIS software, CAD tools, spatial databases |
| | |
| | **3. 3D Mesh Generation** |
| | - Creates continuous surface from discrete points |
| | - Similar to: Digital terrain models (DTMs) for buildings |
| | - Output formats: Compatible with ArcGIS, QGIS, SketchUp |
| | |
| | ### Quality Metrics Explained |
| | |
| | - **Point Cloud Density**: Higher points = better spatial resolution |
| | - **Geometric Accuracy**: Manifold checks ensure valid topology |
| | - **Surface Continuity**: Watertight meshes = complete volume calculations |
| | - **Data Fidelity**: Triangle count indicates level of detail |
| | |
| | ### Limitations for Geographic Applications |
| | |
| | 1. **Scale Ambiguity**: Requires ground control points for absolute measurements |
| | 2. **Single Viewpoint**: Cannot capture occluded facades or hidden spaces |
| | 3. **No Georeferencing**: Outputs in local coordinates, not global (lat/lon) |
| | 4. **Weather Dependent**: Best results with clear, well-lit conditions |
| | |
| | ### Comparison with Traditional Methods |
| | |
| | **vs. Terrestrial Laser Scanning (TLS):** |
| | - Much cheaper, faster, more accessible |
| | - Lower accuracy, no absolute scale |
| | |
| | **vs. Photogrammetry (Structure-from-Motion):** |
| | - Works with single image, faster processing |
| | - Less accurate, cannot resolve scale |
| | |
| | **vs. LiDAR:** |
| | - Much lower cost, consumer cameras sufficient |
| | - Lower precision, no absolute measurements |
| | |
| | |
| | |
| | ## Reconstruction Pipeline (10 Steps) |
| | |
| | 1. **Image Preprocessing**: Resize to model requirements |
| | 2. **Depth Estimation**: Neural network inference |
| | 3. **Depth Visualization**: Create comparison images |
| | 4. **Point Cloud Generation**: Back-project using camera model |
| | 5. **Outlier Removal**: Statistical filtering |
| | 6. **Normal Estimation**: Surface orientation calculation |
| | 7. **Mesh Reconstruction**: Poisson surface reconstruction |
| | 8. **Quality Metrics**: Compute geometric measures |
| | 9. **3D Visualization**: Create interactive viewer |
| | 10. **File Export**: Generate multiple formats |
| | |
| | ### Key References |
| | |
| | 1. **Kim, D., et al. (2022)**. "Global-Local Path Networks for Monocular Depth Estimation |
| | with Vertical CutDepth." *CVPR 2022* |
| | 2. **Kazhdan, M., et al. (2006)**. "Poisson Surface Reconstruction." |
| | *Eurographics Symposium on Geometry Processing* |
| | """) |
| | |
| | with gr.Tab("Usage Guide"): |
| | gr.Markdown(""" |
| | ## How to Use This Application |
| | |
| | ### Step 1: Read Responsible Use Guidelines |
| | - **REQUIRED**: Review the "Responsible Use" tab first |
| | - Understand privacy implications |
| | - Acknowledge model limitations and biases |
| | - Ensure you have rights to use source images |
| | |
| | ### Step 2: Prepare Your Image |
| | |
| | **Best Practices:** |
| | - Remove EXIF metadata (GPS, timestamps) for privacy |
| | - Ensure you have consent if image contains people |
| | - Use well-lit, clear photographs |
| | - Recommended resolution: 512-1024 pixels |
| | - Indoor scenes work best |
| | |
| | **Privacy Checklist:** |
| | - [ ] No identifiable people (or consent obtained) |
| | - [ ] No sensitive/private locations |
| | - [ ] EXIF data removed |
| | - [ ] You own rights to the image |
| | |
| | ### Step 3: Upload Image |
| | - Click "Upload Image" area |
| | - Select JPG, PNG, or BMP file |
| | - **Note:** Webcam option removed for privacy protection |
| | - You can also paste from clipboard |
| | |
| | ### Step 4: Check Consent Box |
| | - Check "I have read and agree to Responsible Use Guidelines" |
| | - This confirms you've reviewed ethical guidelines |
| | - Processing won't start without consent |
| | |
| | ### Step 5: Choose Visualization |
| | - **Mesh**: Solid 3D surface (recommended) |
| | - **Point Cloud**: Individual 3D points with colors |
| | |
| | ### Step 6: Start Reconstruction |
| | - Click "Start Reconstruction" |
| | - Processing takes 10-60 seconds |
| | - All processing is local (no cloud upload) |
| | |
| | ### Step 7: Explore Results |
| | |
| | **Depth Map:** |
| | - Yellow/Red = Farther objects |
| | - Purple/Blue = Closer objects |
| | - Shows AI's depth understanding |
| | |
| | **3D Viewer:** |
| | - Rotate: Click and drag |
| | - Zoom: Scroll wheel |
| | - Pan: Right-click and drag |
| | - Reset: Double-click |
| | |
| | **Metrics Report:** |
| | - Processing performance |
| | - Quality indicators |
| | - Topology validation |
| | |
| | ### Step 8: Download Files |
| | - ZIP package contains: |
| | - Point cloud (PLY) |
| | - Mesh (PLY, OBJ, STL) |
| | - Quality metrics (JSON) |
| | - All files include responsible AI metadata |
| | |
| | |
| | |
| | ## Viewing Downloaded 3D Files |
| | |
| | ### Free Software Options: |
| | |
| | **MeshLab** (Recommended for beginners) |
| | - Download: https://www.meshlab.net/ |
| | - Open PLY, OBJ, STL files |
| | - Great for viewing and basic editing |
| | |
| | **Blender** (For advanced users) |
| | - Download: https://www.blender.org/ |
| | - Import → Wavefront (.obj) or PLY |
| | - Full 3D modeling and rendering capabilities |
| | |
| | **CloudCompare** (For point clouds) |
| | - Download: https://www.cloudcompare.org/ |
| | - Best for analyzing point cloud data |
| | - Measurement and analysis tools |
| | |
| | **Online Viewers** (No installation) |
| | - https://3dviewer.net/ |
| | - https://www.creators3d.com/online-viewer |
| | - Just drag and drop your OBJ/PLY file |
| | |
| | |
| | |
| | |
| | ## Tips for Best Results |
| | |
| | ### DO: |
| | - Use well-lit images |
| | - Include depth cues (corners, edges) |
| | - Indoor scenes work best |
| | - Medium resolution (512-1024px) |
| | - Remove personal metadata |
| | - Obtain consent for people in images |
| | |
| | ### AVOID: |
| | - Motion blur or low resolution |
| | - Reflective surfaces (mirrors, glass) |
| | - Images without consent |
| | - Private property without permission |
| | - Surveillance or monitoring purposes |
| | - Heavy shadows or darkness |
| | |
| | |
| | ## Understanding the Metrics |
| | |
| | ### Point Cloud Statistics: |
| | - **Initial Points**: Raw points generated from depth |
| | - **Outliers Removed**: Noisy points filtered out (typically 5-15%) |
| | - **Final Points**: Clean points used for mesh generation |
| | |
| | ### Mesh Quality Indicators: |
| | - ** Edge Manifold**: Each edge connects exactly 2 faces (good topology) |
| | - ** Vertex Manifold**: Clean vertex connections |
| | - ** Watertight**: No holes, ready for 3D printing |
| | - ** Marks**: Indicate potential issues (still usable, may need repair) |
| | |
| | ### Processing Times: |
| | - **Depth Estimation**: 0.3-2.5s (GLPN model) |
| | - **Mesh Reconstruction**: 2-10s (depends on point cloud size) |
| | - **Total Time**: Usually 10-60 seconds |
| | |
| | --- |
| | |
| | ## Troubleshooting |
| | |
| | **Problem: No output appears** |
| | - Check browser console for errors |
| | - Try refreshing the page |
| | - Try a smaller/simpler image first |
| | - Check that image uploaded successfully |
| | |
| | **Problem: Mesh has holes or artifacts** |
| | - This is normal for single-view reconstruction |
| | - Hidden surfaces cannot be reconstructed |
| | - Use mesh repair tools in MeshLab if needed |
| | |
| | **Problem: Colors look wrong on mesh** |
| | - Vertex color interpolation is approximate |
| | - This is expected behavior |
| | - Colors on point cloud are more accurate |
| | |
| | **Problem: Processing is very slow** |
| | - Use smaller images |
| | - This is normal on CPU (GPU is much faster) |
| | |
| | **Problem: "Not watertight" in metrics** |
| | - Common for complex scenes |
| | - Still usable for visualization |
| | - For 3D printing: use mesh repair in MeshLab |
| | """) |
| | |
| | with gr.Tab(" Ethics & Impact"): |
| | gr.Markdown(""" |
| | |
| | ## Algorithmic Bias & Fairness |
| | |
| | ### Training Data Representation |
| | |
| | **Geographic Bias:** |
| | - Heavy representation: North America, Europe |
| | - Underrepresented: Africa, South Asia, Pacific Islands |
| | - Impact: Lower accuracy for non-Western architecture |
| | |
| | **Architectural Style Bias:** |
| | - Well-represented: Modern interiors, Western buildings |
| | - Underrepresented: Traditional, vernacular, indigenous structures |
| | - Impact: May misinterpret non-standard spatial layouts |
| | |
| | **Socioeconomic Bias:** |
| | - Training data skewed toward middle/upper-class interiors |
| | - Limited representation of informal settlements |
| | - May not generalize well to all socioeconomic contexts |
| | |
| | |
| | |
| | |
| | |
| | ### Potential Harms |
| | |
| | ** Privacy Violations:** |
| | - Unauthorized 3D reconstruction of private spaces |
| | - Creating models of individuals without consent |
| | - Surveillance and tracking applications |
| | |
| | ** Misinformation:** |
| | - Generating fake 3D evidence |
| | - Manipulating spatial understanding |
| | - Creating misleading visualizations |
| | |
| | ** Property Rights:** |
| | - Unauthorized documentation of copyrighted designs |
| | - Intellectual property theft |
| | - Commercial exploitation without permission |
| | |
| | ### Harm Prevention |
| | |
| | 1. **Mandatory consent**: Require user acknowledgment |
| | 2. **Use case restriction**: Prohibit surveillance and deceptive uses |
| | 3. **Privacy protection**: Disable webcam, encourage EXIF removal |
| | 4. **Transparency**: Clear documentation of limitations |
| | |
| | |
| | |
| | ## Accountability & Governance |
| | |
| | ### User Responsibilities |
| | |
| | As a user, you are responsible for: |
| | - Ensuring lawful use of source images |
| | - Obtaining necessary consents and permissions |
| | - Respecting privacy and intellectual property |
| | - Using outputs ethically and transparently |
| | - Understanding and accounting for model biases |
| | |
| | ### Developer Responsibilities |
| | |
| | This tool implements: |
| | - Clear responsible use guidelines |
| | - Privacy-protective design (no webcam, local processing) |
| | - Bias documentation and transparency |
| | - Prohibited use cases explicitly stated |
| | |
| | |
| | ## Future Directions |
| | |
| | ### Improving Fairness |
| | - Train on more diverse geographic datasets |
| | - Include underrepresented architectural styles |
| | - Develop bias mitigation techniques |
| | - Community-driven model evaluation |
| | |
| | ### Enhancing Privacy |
| | - Face/person detection and redaction |
| | - Automatic EXIF stripping |
| | - Differential privacy techniques |
| | """) |
| | |
| | with gr.Tab(" Citation"): |
| | gr.Markdown(""" |
| | ## Academic Citation |
| | |
| | ### For GLPN Model: |
| | ```bibtex |
| | @inproceedings{kim2022global, |
| | title={Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth}, |
| | author={Kim, Doyeon and Ga, Woonghyun and Ahn, Pyungwhan and Joo, Donggyu and Chun, Sehwan and Kim, Junmo}, |
| | booktitle={CVPR}, |
| | year={2022} |
| | } |
| | ``` |
| | |
| | ### For Poisson Surface Reconstruction: |
| | ```bibtex |
| | @inproceedings{kazhdan2006poisson, |
| | title={Poisson Surface Reconstruction}, |
| | author={Kazhdan, Michael and Bolitho, Matthew and Hoppe, Hugues}, |
| | booktitle={Symposium on Geometry Processing}, |
| | year={2006} |
| | } |
| | ``` |
| | |
| | ## Open Source Components |
| | |
| | This application is built with: |
| | |
| | - **Transformers** (Hugging Face): Model inference framework |
| | - **Open3D**: Point cloud and mesh processing |
| | - **PyTorch**: Deep learning framework |
| | - **Plotly**: Interactive 3D visualization |
| | - **Gradio**: Web interface framework |
| | - **NumPy** & **SciPy**: Numerical computing |
| | - **Matplotlib**: Data visualization |
| | - **Pillow (PIL)**: Image processing |
| | |
| | ## Model Credits |
| | |
| | **GLPN Model:** |
| | - Developed by: KAIST (Korea Advanced Institute of Science and Technology) |
| | - Hosted by: Hugging Face (vinvino02/glpn-nyu) |
| | - License: Apache 2.0 |
| | |
| | ## Responsible AI Features |
| | |
| | This implementation includes: |
| | - Privacy-protective design (no webcam option) |
| | - Mandatory consent acknowledgment |
| | - Bias documentation and transparency |
| | - Ethical use guidelines |
| | |
| | |
| | |
| | """) |
| | |
| | gr.Markdown(""" |
| | --- |
| | |
| | **Version:** 2.0 (Responsible AI Edition - Optimized) |
| | **Last Updated:** 2025 |
| | **License:** Educational and Research Use |
| | |
| | """) |
| |
|
| | if __name__ == "__main__": |
| | print("="*60) |
| | print("RESPONSIBLE AI 3D RECONSTRUCTION") |
| | print("="*60) |
| | print("✓ Lightweight model (GLPN only)") |
| | print("✓ No webcam option") |
| | print("✓ Local processing") |
| | print("✓ Consent required") |
| | print("="*60) |
| | demo.launch(share=True) |