Spaces:

mac9087
/

rightnight

Sleeping

App Files Files Community

mac9087 commited on Apr 24, 2025

Commit

bf928c6

verified ·

1 Parent(s): 8d0a7e9

Update app.py

Browse files

Files changed (1) hide show

app.py +183 -230

app.py CHANGED Viewed

@@ -4,8 +4,6 @@ import time
 import threading
 import json
 import gc
-import numpy as np
-import trimesh
 from flask import Flask, request, jsonify, send_file, Response, stream_with_context
 from werkzeug.utils import secure_filename
 from PIL import Image
@@ -15,9 +13,12 @@ import uuid
 import traceback
 from huggingface_hub import snapshot_download
 from flask_cors import CORS
-from scipy.ndimage import gaussian_filter
 import cv2
-from transformers import pipeline, AutoFeatureExtractor, AutoModelForDepthEstimation
 app = Flask(__name__)
 CORS(app)  # Enable CORS for all routes
@@ -45,12 +46,12 @@ app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16MB max
 processing_jobs = {}
 # Global model variables
-depth_model = None
 model_loaded = False
 model_loading = False
 # Configuration for processing
-TIMEOUT_SECONDS = 300  # 5 minutes max for processing
 MAX_DIMENSION = 512    # Max image dimension to process
 # TimeoutError for handling timeouts
@@ -134,23 +135,24 @@ def preprocess_image(image_path):
         return img
 def load_model():
-    global depth_model, model_loaded, model_loading
     if model_loaded:
-        return depth_model
     if model_loading:
         # Wait for model to load if it's already in progress
         while model_loading and not model_loaded:
             time.sleep(0.5)
-        return depth_model
     try:
         model_loading = True
         print("Starting model loading...")
-        # Using MiDaS model which provides better depth estimation
-        model_name = "Intel/dpt-hybrid-midas"
         # Download model with retry mechanism
         max_retries = 3
@@ -158,26 +160,11 @@ def load_model():
         for attempt in range(max_retries):
             try:
-                feature_extractor = AutoFeatureExtractor.from_pretrained(
-                    model_name,
-                    cache_dir=CACHE_DIR
                 )
-                model = AutoModelForDepthEstimation.from_pretrained(
-                    model_name,
-                    cache_dir=CACHE_DIR
-                )
-                # Check device availability
-                device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-                model = model.to(device)
-                # Create depth estimator object
-                depth_model = {
-                    "feature_extractor": feature_extractor,
-                    "model": model,
-                    "device": device
-                }
                 break
             except Exception as e:
                 if attempt < max_retries - 1:
@@ -187,13 +174,24 @@ def load_model():
                 else:
                     raise
         # Optimize memory usage
-        if device == torch.device("cuda"):
             torch.cuda.empty_cache()
         model_loaded = True
         print(f"Model loaded successfully on {device}")
-        return depth_model
     except Exception as e:
         print(f"Error loading model: {str(e)}")
@@ -202,39 +200,7 @@ def load_model():
     finally:
         model_loading = False
-# Enhanced depth estimation function
-def estimate_depth(image, model):
-    # Extract features and run through model
-    feature_extractor = model["feature_extractor"]
-    depth_model = model["model"]
-    device = model["device"]
-    if isinstance(image, Image.Image):
-        # Convert PIL image to numpy if needed
-        image_np = np.array(image)
-    else:
-        image_np = image
-    # Process with feature extractor
-    inputs = feature_extractor(images=image_np, return_tensors="pt")
-    inputs = {k: v.to(device) for k, v in inputs.items()}
-    # Run inference
-    with torch.no_grad():
-        outputs = depth_model(**inputs)
-        predicted_depth = outputs.predicted_depth
-    # Convert to numpy
-    depth_map = predicted_depth.squeeze().cpu().numpy()
-    # Normalize depth map to 0-1 range
-    depth_min = depth_map.min()
-    depth_max = depth_map.max()
-    depth_map = (depth_map - depth_min) / (depth_max - depth_min)
-    return depth_map
-# Enhanced depth map processing to improve detail quality
 def enhance_depth_map(depth_map, detail_level='medium'):
     """Apply sophisticated processing to enhance depth map details"""
     # Convert to numpy array if needed
@@ -257,12 +223,16 @@ def enhance_depth_map(depth_map, detail_level='medium'):
     # Apply different enhancement methods based on detail level
     if detail_level == 'high':
-        # Apply unsharp masking for edge enhancement
         blurred = gaussian_filter(enhanced_depth, sigma=1.5)
         mask = enhanced_depth - blurred
         enhanced_depth = enhanced_depth + 1.5 * mask
         # Apply bilateral filter to preserve edges while smoothing noise
         smooth1 = gaussian_filter(enhanced_depth, sigma=0.5)
         smooth2 = gaussian_filter(enhanced_depth, sigma=2.0)
         edge_mask = enhanced_depth - smooth2
@@ -270,6 +240,7 @@ def enhance_depth_map(depth_map, detail_level='medium'):
     elif detail_level == 'medium':
         # Less aggressive but still effective enhancement
         blurred = gaussian_filter(enhanced_depth, sigma=1.0)
         mask = enhanced_depth - blurred
         enhanced_depth = enhanced_depth + 0.8 * mask
@@ -286,71 +257,65 @@ def enhance_depth_map(depth_map, detail_level='medium'):
     return enhanced_depth
-# New function to infer a complete 3D model
-def create_complete_3d_model(depth_map, image, resolution=100, detail_level='medium'):
-    """Creates a full 3D model with front, sides, and back from a single depth map"""
-    # Enhanced depth map
     enhanced_depth = enhance_depth_map(depth_map, detail_level)
-    # Get dimensions
     h, w = enhanced_depth.shape
-    # Create base grid for the front face
-    x = np.linspace(-1, 1, resolution)
-    y = np.linspace(-1, 1, resolution)
     x_grid, y_grid = np.meshgrid(x, y)
-    # Create a 3D box vertices list (all 8 corners of a box)
-    thickness = 0.5  # Thickness of the model
-    # Create a mesh with all 6 sides
-    vertices = []
-    faces = []
-    # For texture coordinates
-    img_array = np.array(image)
-    vertex_colors = []
-    # 1. Create front face (existing depth-based approach)
-    front_vertices_count = resolution * resolution
-    # Bilinear interpolation of the depth map
-    from scipy import interpolate
-    interp_func = interpolate.RectBivariateSpline(np.linspace(0, 1, h), np.linspace(0, 1, w), enhanced_depth)
-    interp_y = np.linspace(0, 1, resolution)
-    interp_x = np.linspace(0, 1, resolution)
-    z_values = interp_func(interp_y, interp_x)
-    # Scale depth values for better visualization
-    depth_scale = 1.0
     if detail_level == 'high':
-        depth_scale = 1.2
-    elif detail_level == 'low':
-        depth_scale = 0.8
-    # Add front face vertices with actual depth values
-    for i in range(resolution):
-        for j in range(resolution):
-            # X and Y are grid coordinates, Z is from depth map
-            vx = x_grid[i, j]
-            vy = y_grid[i, j]
-            vz = -depth_scale * z_values[i, j]  # Negative because depth is into the screen
-            vertices.append([vx, vy, vz])
-            # Add vertex colors from the original image
-            img_y = int(i * (img_array.shape[0] - 1) / (resolution - 1))
-            img_x = int(j * (img_array.shape[1] - 1) / (resolution - 1))
-            if len(img_array.shape) == 3 and img_array.shape[2] >= 3:
-                color = [img_array[img_y, img_x, 0], img_array[img_y, img_x, 1], img_array[img_y, img_x, 2], 255]
-            else:
-                # Grayscale
-                gray = img_array[img_y, img_x]
-                color = [gray, gray, gray, 255]
-            vertex_colors.append(color)
-    # Add front face triangles
     for i in range(resolution-1):
         for j in range(resolution-1):
             p1 = i * resolution + j
@@ -358,119 +323,103 @@ def create_complete_3d_model(depth_map, image, resolution=100, detail_level='med
             p3 = (i + 1) * resolution + j
             p4 = (i + 1) * resolution + (j + 1)
-            # Two triangles per grid cell
-            faces.append([p1, p2, p4])
-            faces.append([p1, p4, p3])
-    # 2. Create back face (offset from front face)
-    back_depth = -1.0  # Fixed back depth
-    # Add back face vertices
-    back_start_idx = len(vertices)
-    for i in range(resolution):
-        for j in range(resolution):
-            vx = x_grid[i, j]
-            vy = y_grid[i, j]
-            vz = back_depth
-            vertices.append([vx, vy, vz])
-            # Use darkened version of front face color for back
-            front_color = vertex_colors[i * resolution + j].copy()
-            # Darken color for back face
-            darkened = [int(c * 0.7) for c in front_color[:3]] + [front_color[3]]
-            vertex_colors.append(darkened)
-    # Add back face triangles (reverse winding)
-    for i in range(resolution-1):
-        for j in range(resolution-1):
-            p1 = back_start_idx + i * resolution + j
-            p2 = back_start_idx + i * resolution + (j + 1)
-            p3 = back_start_idx + (i + 1) * resolution + j
-            p4 = back_start_idx + (i + 1) * resolution + (j + 1)
-            # Reverse winding order for back face
-            faces.append([p1, p4, p2])
-            faces.append([p1, p3, p4])
-    # 3. Create side faces (connecting front to back)
-    # Top side
-    for j in range(resolution-1):
-        # Front edge vertices
-        f1 = j
-        f2 = j + 1
-        # Back edge vertices
-        b1 = back_start_idx + j
-        b2 = back_start_idx + j + 1
-        faces.append([f1, b1, b2])
-        faces.append([f1, b2, f2])
-    # Bottom side
-    bottom_row = (resolution - 1) * resolution
-    for j in range(resolution-1):
-        # Front edge vertices
-        f1 = bottom_row + j
-        f2 = bottom_row + j + 1
-        # Back edge vertices
-        b1 = back_start_idx + bottom_row + j
-        b2 = back_start_idx + bottom_row + j + 1
-        faces.append([f1, f2, b2])
-        faces.append([f1, b2, b1])
-    # Left side
-    for i in range(resolution-1):
-        # Front edge vertices
-        f1 = i * resolution
-        f2 = (i + 1) * resolution
-        # Back edge vertices
-        b1 = back_start_idx + i * resolution
-        b2 = back_start_idx + (i + 1) * resolution
-        faces.append([f1, b1, b2])
-        faces.append([f1, b2, f2])
-    # Right side
-    right_col = resolution - 1
-    for i in range(resolution-1):
-        # Front edge vertices
-        f1 = i * resolution + right_col
-        f2 = (i + 1) * resolution + right_col
-        # Back edge vertices
-        b1 = back_start_idx + i * resolution + right_col
-        b2 = back_start_idx + (i + 1) * resolution + right_col
-        faces.append([f1, f2, b2])
-        faces.append([f1, b2, b1])
-    # Convert to numpy arrays
-    vertices = np.array(vertices)
     faces = np.array(faces)
-    vertex_colors = np.array(vertex_colors)
     # Create mesh
-    mesh = trimesh.Trimesh(
-        vertices=vertices,
-        faces=faces,
-        vertex_colors=vertex_colors,
-        process=True
-    )
-    # Fix normals to point outward
-    mesh.fix_normals()
-    # Add smoothing for better visual quality if not high detail
     if detail_level != 'high':
         mesh = mesh.smoothed(method='laplacian', iterations=1)
     return mesh
 @app.route('/health', methods=['GET'])
 def health_check():
     return jsonify({
         "status": "healthy",
-        "model": "Advanced 3D Model Generator with Complete Structure",
         "device": "cuda" if torch.cuda.is_available() else "cpu"
     }), 200
@@ -531,7 +480,7 @@ def convert_image_to_3d():
         mesh_resolution = min(int(request.form.get('mesh_resolution', 100)), 200)  # Limit max resolution
         output_format = request.form.get('output_format', 'obj').lower()
         detail_level = request.form.get('detail_level', 'medium').lower()  # Parameter for detail level
-        completeness = request.form.get('completeness', 'full').lower()  # New parameter for model completeness
     except ValueError:
         return jsonify({"error": "Invalid parameter values"}), 400
@@ -588,11 +537,22 @@ def convert_image_to_3d():
             # Process image with thread-safe timeout
             try:
-                def run_depth_estimation():
                     # Get depth map
-                    return estimate_depth(image, model)
-                depth_map, error = process_with_timeout(run_depth_estimation, [], TIMEOUT_SECONDS)
                 if error:
                     if isinstance(error, TimeoutError):
@@ -604,9 +564,9 @@ def convert_image_to_3d():
                 processing_jobs[job_id]['progress'] = 60
-                # Create complete 3D model with front, sides and back
                 mesh_resolution_int = int(mesh_resolution)
-                mesh = create_complete_3d_model(depth_map, image, resolution=mesh_resolution_int, detail_level=detail_level)
                 processing_jobs[job_id]['progress'] = 80
             except Exception as e:
@@ -718,13 +678,6 @@ def download_model(job_id):
             return send_file(glb_path, as_attachment=True, download_name="model.glb")
     return jsonify({"error": "File not found"}), 404
 @app.route('/preview/<job_id>', methods=['GET'])
 def preview_model(job_id):

 import threading
 import json
 import gc
 from flask import Flask, request, jsonify, send_file, Response, stream_with_context
 from werkzeug.utils import secure_filename
 from PIL import Image
 import traceback
 from huggingface_hub import snapshot_download
 from flask_cors import CORS
+import numpy as np
+import trimesh
+from transformers import pipeline
+from scipy.ndimage import gaussian_filter, uniform_filter, median_filter
+from scipy import interpolate
 import cv2
 app = Flask(__name__)
 CORS(app)  # Enable CORS for all routes
 processing_jobs = {}
 # Global model variables
+depth_estimator = None
 model_loaded = False
 model_loading = False
 # Configuration for processing
+TIMEOUT_SECONDS = 240  # 4 minutes max for processing
 MAX_DIMENSION = 512    # Max image dimension to process
 # TimeoutError for handling timeouts
         return img
 def load_model():
+    global depth_estimator, model_loaded, model_loading
     if model_loaded:
+        return depth_estimator
     if model_loading:
         # Wait for model to load if it's already in progress
         while model_loading and not model_loaded:
             time.sleep(0.5)
+        return depth_estimator
     try:
         model_loading = True
         print("Starting model loading...")
+        # Using DPT-Large which provides better detail than DPT-Hybrid
+        # Alternatively, consider "vinvino02/glpn-nyu" for different detail characteristics
+        model_name = "Intel/dpt-large"
         # Download model with retry mechanism
         max_retries = 3
         for attempt in range(max_retries):
             try:
+                snapshot_download(
+                    repo_id=model_name,
+                    cache_dir=CACHE_DIR,
+                    resume_download=True,
                 )
                 break
             except Exception as e:
                 if attempt < max_retries - 1:
                 else:
                     raise
+        # Initialize model with appropriate precision
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        # Load depth estimator pipeline
+        depth_estimator = pipeline(
+            "depth-estimation",
+            model=model_name,
+            device=device if device == "cuda" else -1,
+            cache_dir=CACHE_DIR
+        )
         # Optimize memory usage
+        if device == "cuda":
             torch.cuda.empty_cache()
         model_loaded = True
         print(f"Model loaded successfully on {device}")
+        return depth_estimator
     except Exception as e:
         print(f"Error loading model: {str(e)}")
     finally:
         model_loading = False
+# Enhanced depth processing function to improve detail quality
 def enhance_depth_map(depth_map, detail_level='medium'):
     """Apply sophisticated processing to enhance depth map details"""
     # Convert to numpy array if needed
     # Apply different enhancement methods based on detail level
     if detail_level == 'high':
+        # Apply unsharp masking for edge enhancement - simulating Hunyuan's detail technique
+        # First apply gaussian blur
         blurred = gaussian_filter(enhanced_depth, sigma=1.5)
+        # Create the unsharp mask
         mask = enhanced_depth - blurred
+        # Apply the mask with strength factor
         enhanced_depth = enhanced_depth + 1.5 * mask
         # Apply bilateral filter to preserve edges while smoothing noise
+        # Simulate using gaussian combinations
         smooth1 = gaussian_filter(enhanced_depth, sigma=0.5)
         smooth2 = gaussian_filter(enhanced_depth, sigma=2.0)
         edge_mask = enhanced_depth - smooth2
     elif detail_level == 'medium':
         # Less aggressive but still effective enhancement
+        # Apply mild unsharp masking
         blurred = gaussian_filter(enhanced_depth, sigma=1.0)
         mask = enhanced_depth - blurred
         enhanced_depth = enhanced_depth + 0.8 * mask
     return enhanced_depth
+# Convert depth map to 3D mesh with significantly enhanced detail
+def depth_to_mesh(depth_map, image, resolution=100, detail_level='medium'):
+    """Convert depth map to 3D mesh with highly improved detail preservation"""
+    # First, enhance the depth map for better details
     enhanced_depth = enhance_depth_map(depth_map, detail_level)
+    # Get dimensions of depth map
     h, w = enhanced_depth.shape
+    # Create a higher resolution grid for better detail
+    x = np.linspace(0, w-1, resolution)
+    y = np.linspace(0, h-1, resolution)
     x_grid, y_grid = np.meshgrid(x, y)
+    # Use bicubic interpolation for smoother surface with better details
+    # Create interpolation function
+    interp_func = interpolate.RectBivariateSpline(
+        np.arange(h), np.arange(w), enhanced_depth, kx=3, ky=3
+    )
+    # Sample depth at grid points with the interpolation function
+    z_values = interp_func(y, x, grid=True)
+    # Apply a post-processing step to enhance small details even further
+    if detail_level == 'high':
+        # Calculate local gradients to detect edges
+        dx = np.gradient(z_values, axis=1)
+        dy = np.gradient(z_values, axis=0)
+        # Enhance edges by increasing depth differences at high gradient areas
+        gradient_magnitude = np.sqrt(dx**2 + dy**2)
+        edge_mask = np.clip(gradient_magnitude * 5, 0, 0.2)  # Scale and limit effect
+        # Apply edge enhancement
+        z_values = z_values + edge_mask * (z_values - gaussian_filter(z_values, sigma=1.0))
+    # Normalize z-values with advanced scaling for better depth impression
+    z_min, z_max = np.percentile(z_values, [2, 98])  # Remove outliers
+    z_values = (z_values - z_min) / (z_max - z_min) if z_max > z_min else z_values
+    # Apply depth scaling appropriate to the detail level
     if detail_level == 'high':
+        z_scaling = 2.5  # More pronounced depth variations
+    elif detail_level == 'medium':
+        z_scaling = 2.0  # Standard depth
+    else:
+        z_scaling = 1.5  # More subtle depth variations
+    z_values = z_values * z_scaling
+    # Normalize x and y coordinates
+    x_grid = (x_grid / w - 0.5) * 2.0  # Map to -1 to 1
+    y_grid = (y_grid / h - 0.5) * 2.0  # Map to -1 to 1
+    # Create vertices
+    vertices = np.vstack([x_grid.flatten(), -y_grid.flatten(), -z_values.flatten()]).T
+    # Create faces (triangles) with optimized winding for better normals
+    faces = []
     for i in range(resolution-1):
         for j in range(resolution-1):
             p1 = i * resolution + j
             p3 = (i + 1) * resolution + j
             p4 = (i + 1) * resolution + (j + 1)
+            # Calculate normals to ensure consistent orientation
+            v1 = vertices[p1]
+            v2 = vertices[p2]
+            v3 = vertices[p3]
+            v4 = vertices[p4]
+            # Calculate normals for both possible triangulations
+            # and choose the one that's more consistent
+            norm1 = np.cross(v2-v1, v4-v1)
+            norm2 = np.cross(v4-v3, v1-v3)
+            if np.dot(norm1, norm2) >= 0:
+                # Standard triangulation
+                faces.append([p1, p2, p4])
+                faces.append([p1, p4, p3])
+            else:
+                # Alternative triangulation for smoother surface
+                faces.append([p1, p2, p3])
+                faces.append([p2, p4, p3])
     faces = np.array(faces)
     # Create mesh
+    mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
+    # Apply advanced texturing if image is provided
+    if image:
+        # Convert to numpy array if needed
+        if isinstance(image, Image.Image):
+            img_array = np.array(image)
+        else:
+            img_array = image
+        # Create vertex colors with improved sampling
+        if resolution <= img_array.shape[0] and resolution <= img_array.shape[1]:
+            # Create vertex colors by sampling the image with bilinear interpolation
+            vertex_colors = np.zeros((vertices.shape[0], 4), dtype=np.uint8)
+            # Get normalized coordinates for sampling
+            for i in range(resolution):
+                for j in range(resolution):
+                    # Calculate exact image coordinates with proper scaling
+                    img_x = j * (img_array.shape[1] - 1) / (resolution - 1)
+                    img_y = i * (img_array.shape[0] - 1) / (resolution - 1)
+                    # Bilinear interpolation for smooth color transitions
+                    x0, y0 = int(img_x), int(img_y)
+                    x1, y1 = min(x0 + 1, img_array.shape[1] - 1), min(y0 + 1, img_array.shape[0] - 1)
+                    # Calculate interpolation weights
+                    wx = img_x - x0
+                    wy = img_y - y0
+                    vertex_idx = i * resolution + j
+                    if len(img_array.shape) == 3 and img_array.shape[2] == 3:  # RGB
+                        # Perform bilinear interpolation for each color channel
+                        r = int((1-wx)*(1-wy)*img_array[y0, x0, 0] + wx*(1-wy)*img_array[y0, x1, 0] +
+                                (1-wx)*wy*img_array[y1, x0, 0] + wx*wy*img_array[y1, x1, 0])
+                        g = int((1-wx)*(1-wy)*img_array[y0, x0, 1] + wx*(1-wy)*img_array[y0, x1, 1] +
+                                (1-wx)*wy*img_array[y1, x0, 1] + wx*wy*img_array[y1, x1, 1])
+                        b = int((1-wx)*(1-wy)*img_array[y0, x0, 2] + wx*(1-wy)*img_array[y0, x1, 2] +
+                                (1-wx)*wy*img_array[y1, x0, 2] + wx*wy*img_array[y1, x1, 2])
+                        vertex_colors[vertex_idx, :3] = [r, g, b]
+                        vertex_colors[vertex_idx, 3] = 255  # Alpha
+                    elif len(img_array.shape) == 3 and img_array.shape[2] == 4:  # RGBA
+                        for c in range(4):  # For each RGBA channel
+                            vertex_colors[vertex_idx, c] = int((1-wx)*(1-wy)*img_array[y0, x0, c] +
+                                                            wx*(1-wy)*img_array[y0, x1, c] +
+                                                            (1-wx)*wy*img_array[y1, x0, c] +
+                                                            wx*wy*img_array[y1, x1, c])
+                    else:
+                        # Handle grayscale with bilinear interpolation
+                        gray = int((1-wx)*(1-wy)*img_array[y0, x0] + wx*(1-wy)*img_array[y0, x1] +
+                                  (1-wx)*wy*img_array[y1, x0] + wx*wy*img_array[y1, x1])
+                        vertex_colors[vertex_idx, :3] = [gray, gray, gray]
+                        vertex_colors[vertex_idx, 3] = 255
+            mesh.visual.vertex_colors = vertex_colors
+    # Apply smoothing to get rid of staircase artifacts
     if detail_level != 'high':
+        # For medium and low detail, apply Laplacian smoothing
+        # but preserve the overall shape
         mesh = mesh.smoothed(method='laplacian', iterations=1)
+    # Calculate and fix normals for better rendering
+    mesh.fix_normals()
     return mesh
 @app.route('/health', methods=['GET'])
 def health_check():
     return jsonify({
         "status": "healthy",
+        "model": "Enhanced Depth-Based 3D Model Generator (DPT-Large)",
         "device": "cuda" if torch.cuda.is_available() else "cpu"
     }), 200
         mesh_resolution = min(int(request.form.get('mesh_resolution', 100)), 200)  # Limit max resolution
         output_format = request.form.get('output_format', 'obj').lower()
         detail_level = request.form.get('detail_level', 'medium').lower()  # Parameter for detail level
+        texture_quality = request.form.get('texture_quality', 'medium').lower()  # New parameter for texture quality
     except ValueError:
         return jsonify({"error": "Invalid parameter values"}), 400
             # Process image with thread-safe timeout
             try:
+                def estimate_depth():
                     # Get depth map
+                    result = model(image)
+                    depth_map = result["depth"]
+                    # Convert to numpy array if needed
+                    if isinstance(depth_map, torch.Tensor):
+                        depth_map = depth_map.cpu().numpy()
+                    elif hasattr(depth_map, 'numpy'):
+                        depth_map = depth_map.numpy()
+                    elif isinstance(depth_map, Image.Image):
+                        depth_map = np.array(depth_map)
+                    return depth_map
+                depth_map, error = process_with_timeout(estimate_depth, [], TIMEOUT_SECONDS)
                 if error:
                     if isinstance(error, TimeoutError):
                 processing_jobs[job_id]['progress'] = 60
+                # Create mesh from depth map with enhanced detail handling
                 mesh_resolution_int = int(mesh_resolution)
+                mesh = depth_to_mesh(depth_map, image, resolution=mesh_resolution_int, detail_level=detail_level)
                 processing_jobs[job_id]['progress'] = 80
             except Exception as e:
             return send_file(glb_path, as_attachment=True, download_name="model.glb")
     return jsonify({"error": "File not found"}), 404
 @app.route('/preview/<job_id>', methods=['GET'])
 def preview_model(job_id):