Spaces:

mac9087
/

rightnight

Running

App Files Files Community

mac9087 commited on Apr 26

Commit

8c9f945

verified ·

1 Parent(s): d33be4e

Update app.py

Browse files

Files changed (1) hide show

app.py +184 -196

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 import torch
 import time
@@ -11,11 +12,11 @@ import io
 import zipfile
 import uuid
 import traceback
-from huggingface_hub import snapshot_download, login, HfFileSystem
 from flask_cors import CORS
 import numpy as np
 import trimesh
-from transformers import pipeline
 from scipy.ndimage import gaussian_filter
 from scipy import interpolate
 import cv2
@@ -34,8 +35,6 @@ os.makedirs(RESULTS_FOLDER, exist_ok=True)
 os.makedirs(CACHE_DIR, exist_ok=True)
 os.environ['HF_HOME'] = CACHE_DIR
-os.environ['TRANSFORMERS_CACHE'] = os.path.join(CACHE_DIR, 'transformers')
-os.environ['HF_DATASETS_CACHE'] = os.path.join(CACHE_DIR, 'datasets')
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
@@ -44,6 +43,8 @@ processing_jobs = {}
 # Model variables
 dpt_estimator = None
 model_loaded = False
 model_loading = False
@@ -84,119 +85,72 @@ def process_with_timeout(function, args, timeout):
 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
-def remove_background(image_path):
-    try:
-        # Load image
-        img = cv2.imread(image_path)
-        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-        # Initialize mask and models for GrabCut
-        mask = np.zeros(img.shape[:2], np.uint8)
-        bgd_model = np.zeros((1, 65), np.float64)
-        fgd_model = np.zeros((1, 65), np.float64)
-        # Define initial rectangle (10% border margin)
-        h, w = img.shape[:2]
-        margin = int(min(w, h) * 0.1)
-        rect = (margin, margin, w - 2 * margin, h - 2 * margin)
-        # Run GrabCut
-        cv2.grabCut(img, mask, rect, bgd_model, fgd_model, 5, cv2.GC_INIT_WITH_RECT)
-        # Create final mask (0 for background, 1 for foreground)
-        mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
-        # Check if foreground exists
-        if np.sum(mask2) == 0:
-            print(f"Warning: No foreground detected in {image_path}")
-            return None
-        # Apply mask and set background to black
-        img = img * mask2[:, :, np.newaxis]
-        img_pil = Image.fromarray(img).convert("RGB")
-        return img_pil
-    except Exception as e:
-        print(f"Error in remove_background for {image_path}: {str(e)}")
-        raise
-def preprocess_image(image_path):
-    img = remove_background(image_path)
-    if img is None:
-        raise ValueError("No foreground detected in image")
-    if img.width > MAX_DIMENSION or img.height > MAX_DIMENSION:
-        if img.width > img.height:
-            new_width = MAX_DIMENSION
-            new_height = int(img.height * (MAX_DIMENSION / img.width))
-        else:
-            new_height = MAX_DIMENSION
-            new_width = int(img.width * (MAX_DIMENSION / img.height))
-        img = img.resize((new_width, new_height), Image.LANCZOS)
-    img_array = np.array(img)
-    if len(img_array.shape) == 3 and img_array.shape[2] == 3:
-        lab = cv2.cvtColor(img_array, cv2.COLOR_RGB2LAB)
-        l, a, b = cv2.split(lab)
-        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
-        cl = clahe.apply(l)
-        enhanced_lab = cv2.merge((cl, a, b))
-        img_array = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)
-        img = Image.fromarray(img_array)
-    return img
 def load_models():
-    global dpt_estimator, model_loaded, model_loading
     if model_loaded:
-        return dpt_estimator
     if model_loading:
         while model_loading and not model_loaded:
             time.sleep(0.5)
-        return dpt_estimator
     try:
         model_loading = True
         print("Loading models...")
         hf_token = os.environ.get('HF_TOKEN')
         if hf_token:
-            print("HF_TOKEN found, attempting login...")
             login(token=hf_token)
             print("Authenticated with Hugging Face token")
-        else:
-            print("Error: HF_TOKEN not found in environment. Intel/dpt-large requires authentication.")
-            raise ValueError("HF_TOKEN is required for Intel/dpt-large")
         dpt_model_name = "Intel/dpt-large"
-        fs = HfFileSystem(token=hf_token)
-        model_cached = os.path.exists(os.path.join(CACHE_DIR, "hub", "models--Intel--dpt-large"))
-        if not model_cached:
-            max_retries = 3
-            retry_delay = 5
-            for attempt in range(max_retries):
-                try:
-                    print(f"Attempting to download {dpt_model_name}, attempt {attempt+1}")
-                    snapshot_download(
-                        repo_id=dpt_model_name,
-                        cache_dir=CACHE_DIR,
-                        resume_download=True,
-                        token=hf_token
-                    )
-                    print(f"Successfully downloaded {dpt_model_name}")
-                    break
-                except Exception as e:
-                    if attempt < max_retries - 1:
-                        print(f"DPT download attempt {attempt+1} failed: {str(e)}. Retrying after {retry_delay}s...")
-                        time.sleep(retry_delay)
-                        retry_delay *= 2
-                    else:
-                        raise
-        else:
-            print(f"{dpt_model_name} already cached in {CACHE_DIR}")
         dpt_estimator = pipeline(
             "depth-estimation",
@@ -208,8 +162,43 @@ def load_models():
         print("DPT-Large loaded")
         gc.collect()
         model_loaded = True
-        return dpt_estimator
     except Exception as e:
         print(f"Error loading models: {str(e)}")
@@ -218,6 +207,38 @@ def load_models():
     finally:
         model_loading = False
 def enhance_depth_map(depth_map, detail_level='medium'):
     enhanced_depth = depth_map.copy().astype(np.float32)
     p_low, p_high = np.percentile(enhanced_depth, [1, 99])
@@ -243,7 +264,7 @@ def enhance_depth_map(depth_map, detail_level='medium'):
     enhanced_depth = np.clip(enhanced_depth, 0, 1)
     return enhanced_depth
-def depth_to_mesh(depth_map, image, resolution=80, detail_level='medium', view_angle=0):
     enhanced_depth = enhance_depth_map(depth_map, detail_level)
     h, w = enhanced_depth.shape
     x = np.linspace(0, w-1, resolution)
@@ -271,10 +292,6 @@ def depth_to_mesh(depth_map, image, resolution=80, detail_level='medium', view_a
     y_grid = (y_grid / h - 0.5) * 2.0
     vertices = np.vstack([x_grid.flatten(), -y_grid.flatten(), -z_values.flatten()]).T
-    if view_angle != 0:
-        rotation_matrix = trimesh.transformations.rotation_matrix(view_angle, [0, 1, 0])
-        vertices = trimesh.transform_points(vertices, rotation_matrix)
     faces = []
     for i in range(resolution-1):
         for j in range(resolution-1):
@@ -319,48 +336,29 @@ def depth_to_mesh(depth_map, image, resolution=80, detail_level='medium', view_a
                             (1-wx)*wy*img_array[y1, x0, 2] + wx*wy*img_array[y1, x1, 2])
                     vertex_colors[vertex_idx, :3] = [r, g, b]
                     vertex_colors[vertex_idx, 3] = 255
                 else:
                     gray = int((1-wx)*(1-wy)*img_array[y0, x0] + wx*(1-wy)*img_array[y0, x1] +
                               (1-wx)*wy*img_array[y1, x0] + wx*wy*img_array[y1, x1])
                     vertex_colors[vertex_idx, :3] = [gray, gray, gray]
                     vertex_colors[vertex_idx, 3] = 255
-        mesh.visual.vertex_colors = vertex_colors
     if detail_level != 'high':
         mesh = mesh.smoothed(method='laplacian', iterations=1)
     mesh.fix_normals()
     return mesh
-def combine_meshes(meshes):
-    if len(meshes) == 1:
-        return meshes[0]
-    combined_vertices = []
-    combined_faces = []
-    vertex_offset = 0
-    for mesh in meshes:
-        combined_vertices.append(mesh.vertices)
-        combined_faces.append(mesh.faces + vertex_offset)
-        vertex_offset += len(mesh.vertices)
-    combined_vertices = np.vstack(combined_vertices)
-    combined_faces = np.vstack(combined_faces)
-    combined_mesh = trimesh.Trimesh(vertices=combined_vertices, faces=combined_faces)
-    combined_mesh = combined_mesh.subdivide_to_size(max_edge=0.05)
-    combined_mesh = combined_mesh.smoothed(method='laplacian', iterations=2)
-    combined_mesh.fill_holes()
-    combined_mesh.fix_normals()
-    return combined_mesh
 @app.route('/health', methods=['GET'])
 def health_check():
     return jsonify({
         "status": "healthy",
-        "model": "DPT-Large (Multi-View)",
         "device": "cpu"
     }), 200
@@ -398,23 +396,18 @@ def progress(job_id):
 @app.route('/convert', methods=['POST'])
 def convert_image_to_3d():
-    required_views = ['front', 'back']
-    optional_views = ['left', 'right']
-    view_files = {}
-    for view in required_views + optional_views:
-        if view in request.files and request.files[view].filename != '':
-            view_files[view] = request.files[view]
-    if not all(view in view_files for view in required_views):
-        return jsonify({"error": "Front and back images are required"}), 400
-    for view, file in view_files.items():
-        if not allowed_file(file.filename):
-            return jsonify({"error": f"File type not allowed for {view}. Supported types: {', '.join(ALLOWED_EXTENSIONS)}"}), 400
     try:
-        mesh_resolution = min(int(request.form.get('mesh_resolution', 80)), 120)
         output_format = request.form.get('output_format', 'glb').lower()
         detail_level = request.form.get('detail_level', 'medium').lower()
         texture_quality = request.form.get('texture_quality', 'medium').lower()
@@ -425,7 +418,7 @@ def convert_image_to_3d():
         return jsonify({"error": "Unsupported output format. Use 'obj' or 'glb'"}), 400
     if detail_level == 'high':
-        mesh_resolution = min(int(mesh_resolution * 1.5), 120)
     elif detail_level == 'low':
         mesh_resolution = max(int(mesh_resolution * 0.7), 50)
@@ -433,12 +426,9 @@ def convert_image_to_3d():
     output_dir = os.path.join(RESULTS_FOLDER, job_id)
     os.makedirs(output_dir, exist_ok=True)
-    filepaths = {}
-    for view, file in view_files.items():
-        filename = secure_filename(file.filename)
-        filepath = os.path.join(app.config['UPLOAD_FOLDER'], f"{job_id}_{view}_{filename}")
-        file.save(filepath)
-        filepaths[view] = filepath
     processing_jobs[job_id] = {
         'status': 'processing',
@@ -450,53 +440,53 @@ def convert_image_to_3d():
         'created_at': time.time()
     }
-    def process_images():
         thread = threading.current_thread()
         processing_jobs[job_id]['thread_alive'] = lambda: thread.is_alive()
         try:
             processing_jobs[job_id]['progress'] = 5
-            images = {}
-            for view, filepath in filepaths.items():
-                try:
-                    images[view] = preprocess_image(filepath)
-                except ValueError as e:
-                    processing_jobs[job_id]['status'] = 'error'
-                    processing_jobs[job_id]['error'] = f"Error preprocessing {view} image: {str(e)}"
-                    return
             processing_jobs[job_id]['progress'] = 10
             try:
-                dpt_model = load_models()
-                processing_jobs[job_id]['progress'] = 20
             except Exception as e:
                 processing_jobs[job_id]['status'] = 'error'
                 processing_jobs[job_id]['error'] = f"Error loading models: {str(e)}"
                 return
             try:
-                def estimate_depths():
-                    meshes = []
-                    view_angles = {'front': 0, 'back': np.pi, 'left': np.pi/2, 'right': -np.pi/2}
                     with torch.no_grad():
-                        for view, image in images.items():
-                            dpt_result = dpt_model(image)
-                            dpt_depth = dpt_result["depth"]
-                            depth_map = np.array(dpt_depth) if isinstance(dpt_depth, Image.Image) else dpt_depth
-                            if len(depth_map.shape) > 2:
-                                depth_map = np.mean(depth_map, axis=2)
-                            p_low, p_high = np.percentile(depth_map, [1, 99])
-                            depth_map = np.clip((depth_map - p_low) / (p_high - p_low), 0, 1) if p_high > p_low else depth_map
-                            mesh = depth_to_mesh(depth_map, image, resolution=mesh_resolution, detail_level=detail_level, view_angle=view_angles[view])
-                            meshes.append(mesh)
-                            gc.collect()
-                    combined_mesh = combine_meshes(meshes)
-                    return combined_mesh
-                combined_mesh, error = process_with_timeout(estimate_depths, [], TIMEOUT_SECONDS)
                 if error:
                     if isinstance(error, TimeoutError):
@@ -506,11 +496,14 @@ def convert_image_to_3d():
                     else:
                         raise error
                 processing_jobs[job_id]['progress'] = 80
                 if output_format == 'obj':
                     obj_path = os.path.join(output_dir, "model.obj")
-                    combined_mesh.export(
                         obj_path,
                         file_type='obj',
                         include_normals=True,
@@ -531,7 +524,7 @@ def convert_image_to_3d():
                 elif output_format == 'glb':
                     glb_path = os.path.join(output_dir, "model.glb")
-                    combined_mesh.export(
                         glb_path,
                         file_type='glb'
                     )
@@ -550,9 +543,8 @@ def convert_image_to_3d():
                 print(error_details)
                 return
-            for filepath in filepaths.values():
-                if os.path.exists(filepath):
-                    os.remove(filepath)
             gc.collect()
         except Exception as e:
@@ -561,11 +553,10 @@ def convert_image_to_3d():
             processing_jobs[job_id]['error'] = f"{str(e)}\n{error_details}"
             print(f"Error processing job {job_id}: {str(e)}")
             print(error_details)
-            for filepath in filepaths.values():
-                if os.path.exists(filepath):
-                    os.remove(filepath)
-    processing_thread = threading.Thread(target=process_images)
     processing_thread.daemon = True
     processing_thread.start()
@@ -675,7 +666,7 @@ def model_info(job_id):
 @app.route('/', methods=['GET'])
 def index():
     return jsonify({
-        "message": "Multi-View Image to 3D API (DPT-Large)",
         "endpoints": [
             "/convert",
             "/progress/<job_id>",
@@ -684,19 +675,16 @@ def index():
             "/model-info/<job_id>"
         ],
         "parameters": {
-            "front": "Image file (required)",
-            "back": "Image file (required)",
-            "left": "Image file (optional)",
-            "right": "Image file (optional)",
-            "mesh_resolution": "Integer (50-120)",
             "output_format": "obj or glb",
             "detail_level": "low, medium, or high",
             "texture_quality": "low, medium, or high"
         },
-        "description": "Creates 3D models from multiple 2D images using Intel DPT-Large with custom background removal."
     }), 200
 if __name__ == '__main__':
     cleanup_old_jobs()
     port = int(os.environ.get('PORT', 7860))
-    app.run(host='0.0.0.0', port=port)

+```python
 import os
 import torch
 import time
 import zipfile
 import uuid
 import traceback
+from huggingface_hub import snapshot_download, login
 from flask_cors import CORS
 import numpy as np
 import trimesh
+from transformers import pipeline, AutoImageProcessor, AutoModelForDepthEstimation
 from scipy.ndimage import gaussian_filter
 from scipy import interpolate
 import cv2
 os.makedirs(CACHE_DIR, exist_ok=True)
 os.environ['HF_HOME'] = CACHE_DIR
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
 # Model variables
 dpt_estimator = None
+depth_anything_model = None
+depth_anything_processor = None
 model_loaded = False
 model_loading = False
 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+def preprocess_image(image_path):
+    with Image.open(image_path) as img:
+        img = img.convert("RGB")
+        if img.width > MAX_DIMENSION or img.height > MAX_DIMENSION:
+            if img.width > img.height:
+                new_width = MAX_DIMENSION
+                new_height = int(img.height * (MAX_DIMENSION / img.width))
+            else:
+                new_height = MAX_DIMENSION
+                new_width = int(img.width * (MAX_DIMENSION / img.height))
+            img = img.resize((new_width, new_height), Image.LANCZOS)
+        img_array = np.array(img)
+        if len(img_array.shape) == 3 and img_array.shape[2] == 3:
+            lab = cv2.cvtColor(img_array, cv2.COLOR_RGB2LAB)
+            l, a, b = cv2.split(lab)
+            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+            cl = clahe.apply(l)
+            enhanced_lab = cv2.merge((cl, a, b))
+            img_array = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)
+            img = Image.fromarray(img_array)
+        return img
 def load_models():
+    global dpt_estimator, depth_anything_model, depth_anything_processor, model_loaded, model_loading
     if model_loaded:
+        return dpt_estimator, depth_anything_model, depth_anything_processor
     if model_loading:
         while model_loading and not model_loaded:
             time.sleep(0.5)
+        return dpt_estimator, depth_anything_model, depth_anything_processor
     try:
         model_loading = True
         print("Loading models...")
+        # Authenticate with Hugging Face
         hf_token = os.environ.get('HF_TOKEN')
         if hf_token:
             login(token=hf_token)
             print("Authenticated with Hugging Face token")
+        # DPT-Large
         dpt_model_name = "Intel/dpt-large"
+        max_retries = 3
+        retry_delay = 5
+        for attempt in range(max_retries):
+            try:
+                snapshot_download(
+                    repo_id=dpt_model_name,
+                    cache_dir=CACHE_DIR,
+                    resume_download=True,
+                    token=hf_token
+                )
+                break
+            except Exception as e:
+                if attempt < max_retries - 1:
+                    print(f"DPT download attempt {attempt+1} failed: {str(e)}. Retrying...")
+                    time.sleep(retry_delay)
+                    retry_delay *= 2
+                else:
+                    raise
         dpt_estimator = pipeline(
             "depth-estimation",
         print("DPT-Large loaded")
         gc.collect()
+        # Depth Anything
+        da_model_name = "depth-anything/Depth-Anything-V2-Small-hf"
+        for attempt in range(max_retries):
+            try:
+                snapshot_download(
+                    repo_id=da_model_name,
+                    cache_dir=CACHE_DIR,
+                    resume_download=True,
+                    token=hf_token
+                )
+                break
+            except Exception as e:
+                if attempt < max_retries - 1:
+                    print(f"Depth Anything download attempt {attempt+1} failed: {str(e)}. Retrying...")
+                    time.sleep(retry_delay)
+                    retry_delay *= 2
+                else:
+                    print(f"Failed to load Depth Anything: {str(e)}. Falling back to DPT-Large only.")
+                    depth_anything_model = None
+                    depth_anything_processor = None
+                    model_loaded = True
+                    return dpt_estimator, None, None
+        depth_anything_processor = AutoImageProcessor.from_pretrained(
+            da_model_name,
+            cache_dir=CACHE_DIR,
+            token=hf_token
+        )
+        depth_anything_model = AutoModelForDepthEstimation.from_pretrained(
+            da_model_name,
+            cache_dir=CACHE_DIR,
+            token=hf_token
+        ).to("cpu")
         model_loaded = True
+        print("Depth Anything loaded")
+        return dpt_estimator, depth_anything_model, depth_anything_processor
     except Exception as e:
         print(f"Error loading models: {str(e)}")
     finally:
         model_loading = False
+def fuse_depth_maps(dpt_depth, da_depth, detail_level='medium'):
+    if isinstance(dpt_depth, Image.Image):
+        dpt_depth = np.array(dpt_depth)
+    if isinstance(da_depth, torch.Tensor):
+        da_depth = da_depth.cpu().numpy()
+    if len(dpt_depth.shape) > 2:
+        dpt_depth = np.mean(dpt_depth, axis=2)
+    if len(da_depth.shape) > 2:
+        da_depth = np.mean(da_depth, axis=2)
+    if dpt_depth.shape != da_depth.shape:
+        da_depth = cv2.resize(da_depth, (dpt_depth.shape[1], dpt_depth.shape[0]), interpolation=cv2.INTER_CUBIC)
+    p_low_dpt, p_high_dpt = np.percentile(dpt_depth, [1, 99])
+    p_low_da, p_high_da = np.percentile(da_depth, [1, 99])
+    dpt_depth = np.clip((dpt_depth - p_low_dpt) / (p_high_dpt - p_low_dpt), 0, 1) if p_high_dpt > p_low_dpt else dpt_depth
+    da_depth = np.clip((da_depth - p_low_da) / (p_high_da - p_low_da), 0, 1) if p_high_da > p_low_da else da_depth
+    if detail_level == 'high':
+        weight_da = 0.7
+        edges = cv2.Canny((da_depth * 255).astype(np.uint8), 50, 150)
+        edge_mask = (edges > 0).astype(np.float32)
+        dpt_weight = gaussian_filter(1 - edge_mask, sigma=1.0)
+        da_weight = gaussian_filter(edge_mask, sigma=1.0)
+        fused_depth = dpt_weight * dpt_depth + da_weight * da_depth * weight_da + (1 - weight_da) * dpt_depth
+    else:
+        weight_da = 0.5 if detail_level == 'medium' else 0.3
+        fused_depth = (1 - weight_da) * dpt_depth + weight_da * da_depth
+    fused_depth = np.clip(fused_depth, 0, 1)
+    return fused_depth
 def enhance_depth_map(depth_map, detail_level='medium'):
     enhanced_depth = depth_map.copy().astype(np.float32)
     p_low, p_high = np.percentile(enhanced_depth, [1, 99])
     enhanced_depth = np.clip(enhanced_depth, 0, 1)
     return enhanced_depth
+def depth_to_mesh(depth_map, image, resolution=100, detail_level='medium'):
     enhanced_depth = enhance_depth_map(depth_map, detail_level)
     h, w = enhanced_depth.shape
     x = np.linspace(0, w-1, resolution)
     y_grid = (y_grid / h - 0.5) * 2.0
     vertices = np.vstack([x_grid.flatten(), -y_grid.flatten(), -z_values.flatten()]).T
     faces = []
     for i in range(resolution-1):
         for j in range(resolution-1):
                             (1-wx)*wy*img_array[y1, x0, 2] + wx*wy*img_array[y1, x1, 2])
                     vertex_colors[vertex_idx, :3] = [r, g, b]
                     vertex_colors[vertex_idx, 3] = 255
+                elif len(img_array.shape) == 3 and img_array.shape[2] == 4:
+                    for c in range(4):
+                        vertex_colors[vertex_idx, c] = int((1-wx)*(1-wy)*img_array[y0, x0, c] +
+                                                        wx*(1-wy)*img_array[y0, x1, c] +
+                                                        (1-wx)*wy*img_array[y1, x0, c] +
+                                                        wx*wy*img_array[y1, x1, c])
                 else:
                     gray = int((1-wx)*(1-wy)*img_array[y0, x0] + wx*(1-wy)*img_array[y0, x1] +
                               (1-wx)*wy*img_array[y1, x0] + wx*wy*img_array[y1, x1])
                     vertex_colors[vertex_idx, :3] = [gray, gray, gray]
                     vertex_colors[vertex_idx, 3] = 255
+            mesh.visual.vertex_colors = vertex_colors
     if detail_level != 'high':
         mesh = mesh.smoothed(method='laplacian', iterations=1)
     mesh.fix_normals()
     return mesh
 @app.route('/health', methods=['GET'])
 def health_check():
     return jsonify({
         "status": "healthy",
+        "model": "DPT-Large + Depth Anything",
         "device": "cpu"
     }), 200
 @app.route('/convert', methods=['POST'])
 def convert_image_to_3d():
+    if 'image' not in request.files:
+        return jsonify({"error": "No image provided"}), 400
+    file = request.files['image']
+    if file.filename == '':
+        return jsonify({"error": "No image selected"}), 400
+    if not allowed_file(file.filename):
+        return jsonify({"error": f"File type not allowed. Supported types: {', '.join(ALLOWED_EXTENSIONS)}"}), 400
     try:
+        mesh_resolution = min(int(request.form.get('mesh_resolution', 100)), 150)
         output_format = request.form.get('output_format', 'glb').lower()
         detail_level = request.form.get('detail_level', 'medium').lower()
         texture_quality = request.form.get('texture_quality', 'medium').lower()
         return jsonify({"error": "Unsupported output format. Use 'obj' or 'glb'"}), 400
     if detail_level == 'high':
+        mesh_resolution = min(int(mesh_resolution * 1.5), 150)
     elif detail_level == 'low':
         mesh_resolution = max(int(mesh_resolution * 0.7), 50)
     output_dir = os.path.join(RESULTS_FOLDER, job_id)
     os.makedirs(output_dir, exist_ok=True)
+    filename = secure_filename(file.filename)
+    filepath = os.path.join(app.config['UPLOAD_FOLDER'], f"{job_id}_{filename}")
+    file.save(filepath)
     processing_jobs[job_id] = {
         'status': 'processing',
         'created_at': time.time()
     }
+    def process_image():
         thread = threading.current_thread()
         processing_jobs[job_id]['thread_alive'] = lambda: thread.is_alive()
         try:
             processing_jobs[job_id]['progress'] = 5
+            image = preprocess_image(filepath)
             processing_jobs[job_id]['progress'] = 10
             try:
+                dpt_model, da_model, da_processor = load_models()
+                processing_jobs[job_id]['progress'] = 30
             except Exception as e:
                 processing_jobs[job_id]['status'] = 'error'
                 processing_jobs[job_id]['error'] = f"Error loading models: {str(e)}"
                 return
             try:
+                def estimate_depth():
                     with torch.no_grad():
+                        # DPT-Large
+                        dpt_result = dpt_model(image)
+                        dpt_depth = dpt_result["depth"]
+                        # Depth Anything (if loaded)
+                        if da_model and da_processor:
+                            inputs = da_processor(images=image, return_tensors="pt")
+                            inputs = {k: v.to("cpu") for k, v in inputs.items()}
+                            outputs = da_model(**inputs)
+                            da_depth = outputs.predicted_depth.squeeze()
+                            da_depth = torch.nn.functional.interpolate(
+                                da_depth.unsqueeze(0).unsqueeze(0),
+                                size=(image.height, image.width),
+                                mode='bicubic',
+                                align_corners=False
+                            ).squeeze()
+                            fused_depth = fuse_depth_maps(dpt_depth, da_depth, detail_level)
+                        else:
+                            fused_depth = np.array(dpt_depth) if isinstance(dpt_depth, Image.Image) else dpt_depth
+                            if len(fused_depth.shape) > 2:
+                                fused_depth = np.mean(fused_depth, axis=2)
+                            p_low, p_high = np.percentile(fused_depth, [1, 99])
+                            fused_depth = np.clip((fused_depth - p_low) / (p_high - p_low), 0, 1) if p_high > p_low else fused_depth
+                        return fused_depth
+                fused_depth, error = process_with_timeout(estimate_depth, [], TIMEOUT_SECONDS)
                 if error:
                     if isinstance(error, TimeoutError):
                     else:
                         raise error
+                processing_jobs[job_id]['progress'] = 60
+                mesh_resolution_int = int(mesh_resolution)
+                mesh = depth_to_mesh(fused_depth, image, resolution=mesh_resolution_int, detail_level=detail_level)
                 processing_jobs[job_id]['progress'] = 80
                 if output_format == 'obj':
                     obj_path = os.path.join(output_dir, "model.obj")
+                    mesh.export(
                         obj_path,
                         file_type='obj',
                         include_normals=True,
                 elif output_format == 'glb':
                     glb_path = os.path.join(output_dir, "model.glb")
+                    mesh.export(
                         glb_path,
                         file_type='glb'
                     )
                 print(error_details)
                 return
+            if os.path.exists(filepath):
+                os.remove(filepath)
             gc.collect()
         except Exception as e:
             processing_jobs[job_id]['error'] = f"{str(e)}\n{error_details}"
             print(f"Error processing job {job_id}: {str(e)}")
             print(error_details)
+            if os.path.exists(filepath):
+                os.remove(filepath)
+    processing_thread = threading.Thread(target=process_image)
     processing_thread.daemon = True
     processing_thread.start()
 @app.route('/', methods=['GET'])
 def index():
     return jsonify({
+        "message": "Image to 3D API (DPT-Large + Depth Anything)",
         "endpoints": [
             "/convert",
             "/progress/<job_id>",
             "/model-info/<job_id>"
         ],
         "parameters": {
+            "mesh_resolution": "Integer (50-150)",
             "output_format": "obj or glb",
             "detail_level": "low, medium, or high",
             "texture_quality": "low, medium, or high"
         },
+        "description": "Creates high-quality 3D models from 2D images using DPT-Large and Depth Anything."
     }), 200
 if __name__ == '__main__':
     cleanup_old_jobs()
     port = int(os.environ.get('PORT', 7860))
+    app.run(host='0.0.0.0', port=port)
+```