Spaces:

lidavidsh
/

depth-anything-3

Runtime error

App Files Files Community

lidavidsh commited on Dec 16, 2025

Commit

246c42f

1 Parent(s): cfa6140

init push

Browse files

Files changed (4) hide show

api_server.py +464 -0
app.py +372 -46
pyproject.toml +3 -3
requirements.txt +1 -22

api_server.py ADDED Viewed

	@@ -0,0 +1,464 @@

+#!/usr/bin/env python3
+# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
+# Backend API server for Depth Anything 3 remote inference
+import os
+import sys
+import asyncio
+import base64
+import io
+import json
+import uuid
+from typing import Dict, Any, Optional
+from datetime import datetime
+import glob
+import shutil
+import zipfile
+import numpy as np
+import torch
+from fastapi import FastAPI, WebSocket, HTTPException, Query
+from fastapi.responses import JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+import uvicorn
+sys.path.append("depth-anything-3/")
+from depth_anything_3.api import DepthAnything3  # noqa: E402
+from depth_anything_3.utils.export.glb import export_to_glb  # noqa: E402
+from depth_anything_3.utils.export.gs import export_to_gs_video  # noqa: E402
+# Initialize FastAPI app
+app = FastAPI(title="Depth Anything 3 Inference API", version="1.0.0")
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Global model instance
+model: Optional[DepthAnything3] = None
+device: Optional[str] = None
+# Job storage: {job_id: {"status": "processing/completed/failed", "result": {...}, "progress": 0}}
+jobs: Dict[str, Dict[str, Any]] = {}
+# WebSocket connections: {client_id: websocket}
+websocket_connections: Dict[str, WebSocket] = {}
+# -------------------------------------------------------------------------
+# Request/Response Models
+# -------------------------------------------------------------------------
+class ImageData(BaseModel):
+    filename: str
+    data: str  # base64 encoded image
+class Options(BaseModel):
+    process_res_method: Optional[str] = "upper_bound_resize"
+    selected_first_frame: Optional[str] = ""
+    infer_gs: Optional[bool] = False
+    # Optional export tuning (defaults if not provided)
+    conf_thresh_percentile: Optional[float] = 40.0
+    num_max_points: Optional[int] = 1_000_000
+    show_cameras: Optional[bool] = True
+    gs_trj_mode: Optional[str] = "extend"  # "extend" | "smooth"
+    gs_video_quality: Optional[str] = "low"  # "low" | "high"
+class InferenceRequest(BaseModel):
+    images: list[ImageData]
+    client_id: str
+    options: Optional[Options] = None
+class InferenceResponse(BaseModel):
+    job_id: str
+    status: str = "queued"
+# -------------------------------------------------------------------------
+# Model Loading
+# -------------------------------------------------------------------------
+def load_model():
+    """Load Depth Anything 3 model on startup (GPU required)"""
+    global model, device
+    print("Initializing and loading Depth Anything 3 model...")
+    if not torch.cuda.is_available():
+        raise RuntimeError("CUDA is not available. GPU is required for DA3 inference.")
+    device = "cuda"
+    model_dir = os.getenv("DA3_MODEL_DIR", "depth-anything/DA3NESTED-GIANT-LARGE")
+    # Load from HF Hub or local path
+    model = DepthAnything3.from_pretrained(model_dir)  # type: ignore
+    model = model.to(device)
+    model.eval()
+    print(f"Model loaded successfully on {device} from {model_dir}")
+# -------------------------------------------------------------------------
+# Helpers
+# -------------------------------------------------------------------------
+def _serialize_bytes(b: bytes) -> str:
+    """Serialize raw bytes to base64 string"""
+    return base64.b64encode(b).decode("utf-8")
+def _serialize_file(path: str) -> str:
+    """Serialize a file at 'path' to base64 string"""
+    with open(path, "rb") as f:
+        return _serialize_bytes(f.read())
+def _zip_dir_to_bytes(dir_path: str) -> bytes:
+    """Zip a directory and return zip bytes"""
+    buffer = io.BytesIO()
+    with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf:
+        for root, _, files in os.walk(dir_path):
+            for fn in files:
+                full = os.path.join(root, fn)
+                arcname = os.path.relpath(full, start=dir_path)
+                zf.write(full, arcname)
+    buffer.seek(0)
+    return buffer.read()
+def _actual_process_method(name: str) -> str:
+    """Map frontend option to actual processing method used by DA3"""
+    mapping = {
+        "high_res": "lower_bound_resize",
+        "low_res": "upper_bound_resize",
+        "upper_bound_resize": "upper_bound_resize",
+        "lower_bound_resize": "lower_bound_resize",
+        "upper_bound_crop": "upper_bound_crop",
+    }
+    return mapping.get(name or "upper_bound_resize", "upper_bound_resize")
+def _save_predictions_npz(target_dir: str, prediction: Any):
+    """Save predictions data to predictions.npz for caching."""
+    try:
+        output_file = os.path.join(target_dir, "predictions.npz")
+        save_dict: Dict[str, Any] = {}
+        if getattr(prediction, "processed_images", None) is not None:
+            save_dict["images"] = prediction.processed_images
+        if getattr(prediction, "depth", None) is not None:
+            save_dict["depths"] = np.round(prediction.depth, 6)
+        if getattr(prediction, "conf", None) is not None:
+            save_dict["conf"] = np.round(prediction.conf, 2)
+        if getattr(prediction, "extrinsics", None) is not None:
+            save_dict["extrinsics"] = prediction.extrinsics
+        if getattr(prediction, "intrinsics", None) is not None:
+            save_dict["intrinsics"] = prediction.intrinsics
+        np.savez_compressed(output_file, **save_dict)
+        print(f"[backend] Saved predictions cache to: {output_file}")
+    except Exception as e:
+        print(f"[backend] Warning: Failed to save predictions cache: {e}")
+# -------------------------------------------------------------------------
+# Core Inference Function
+# -------------------------------------------------------------------------
+async def run_inference(
+    job_id: str,
+    target_dir: str,
+    client_id: Optional[str] = None,
+    options: Optional[Options] = None,
+):
+    """Run DA3 model inference on images and export all artifacts server-side"""
+    try:
+        # Update job status
+        jobs[job_id]["status"] = "processing"
+        # Send WebSocket update (start)
+        if client_id and client_id in websocket_connections:
+            await websocket_connections[client_id].send_json(
+                {"type": "executing", "data": {"job_id": job_id, "node": "start"}}
+            )
+        # Load and preprocess images
+        image_names = glob.glob(os.path.join(target_dir, "images", "*"))
+        image_names = sorted(image_names)
+        print(f"Found {len(image_names)} images for job {job_id}")
+        if len(image_names) == 0:
+            raise ValueError("No images found in target directory")
+        # Reorder for selected first frame
+        selected_first = options.selected_first_frame if options else ""
+        if selected_first:
+            sel_path = None
+            for p in image_names:
+                if os.path.basename(p) == selected_first:
+                    sel_path = p
+                    break
+            if sel_path:
+                image_names = [sel_path] + [p for p in image_names if p != sel_path]
+                print(f"Selected first frame: {selected_first} -> {sel_path}")
+        # Send progress updates
+        if client_id and client_id in websocket_connections:
+            await websocket_connections[client_id].send_json(
+                {"type": "executing", "data": {"job_id": job_id, "node": "preprocess"}}
+            )
+        # Run inference (do not export during inference; export explicitly below)
+        print(f"Running inference for job {job_id}...")
+        actual_method = _actual_process_method(
+            options.process_res_method if options else "upper_bound_resize"
+        )
+        with torch.no_grad():
+            prediction = model.inference(
+                image=image_names,
+                process_res_method=actual_method,
+                export_dir=None,  # export manually below
+                export_format="mini_npz",
+                infer_gs=bool(options.infer_gs) if options else False,
+            )
+        if client_id and client_id in websocket_connections:
+            await websocket_connections[client_id].send_json(
+                {"type": "executing", "data": {"job_id": job_id, "node": "postprocess"}}
+            )
+        # Export GLB and (optional) GS video on backend
+        try:
+            export_to_glb(
+                prediction,
+                export_dir=target_dir,
+                num_max_points=int(options.num_max_points) if options else 1_000_000,
+                conf_thresh_percentile=float(options.conf_thresh_percentile) if options else 40.0,
+                show_cameras=bool(options.show_cameras) if options else True,
+            )
+            print(f"[backend] Exported GLB + depth_vis to {target_dir}")
+        except Exception as e:
+            print(f"[backend] GLB export failed: {e}")
+        if options and bool(options.infer_gs):
+            try:
+                mode_mapping = {"extend": "extend", "smooth": "interpolate_smooth"}
+                export_to_gs_video(
+                    prediction,
+                    export_dir=target_dir,
+                    chunk_size=4,
+                    trj_mode=mode_mapping.get(options.gs_trj_mode or "extend", "extend"),
+                    enable_tqdm=False,
+                    vis_depth="hcat",
+                    video_quality=options.gs_video_quality or "low",
+                )
+                print(f"[backend] Exported GS video to {target_dir}")
+            except Exception as e:
+                print(f"[backend] GS video export failed: {e}")
+        # Save predictions.npz on backend
+        _save_predictions_npz(target_dir, prediction)
+        # Package artifacts
+        artifacts: Dict[str, Any] = {}
+        glb_path = os.path.join(target_dir, "scene.glb")
+        if os.path.exists(glb_path):
+            artifacts["glb"] = _serialize_file(glb_path)
+        depth_vis_dir = os.path.join(target_dir, "depth_vis")
+        if os.path.isdir(depth_vis_dir):
+            try:
+                artifacts["depth_vis_zip"] = _serialize_bytes(_zip_dir_to_bytes(depth_vis_dir))
+            except Exception as e:
+                print(f"[backend] depth_vis zip failed: {e}")
+        npz_path = os.path.join(target_dir, "predictions.npz")
+        if os.path.exists(npz_path):
+            artifacts["predictions_npz"] = _serialize_file(npz_path)
+        # Optional GS video: search for mp4 under target_dir
+        mp4_candidates = glob.glob(os.path.join(target_dir, "*.mp4"))
+        if mp4_candidates:
+            # take first mp4 (backend exporter may use fixed name)
+            artifacts["gs_video"] = _serialize_file(mp4_candidates[0])
+        # Store result
+        jobs[job_id]["status"] = "completed"
+        jobs[job_id]["result"] = {"artifacts": artifacts}
+        # Send completion via WebSocket
+        if client_id and client_id in websocket_connections:
+            await websocket_connections[client_id].send_json(
+                {
+                    "type": "executing",
+                    "data": {
+                        "job_id": job_id,
+                        "node": None,  # None indicates completion
+                    },
+                }
+            )
+        # Clean up
+        try:
+            torch.cuda.empty_cache()
+        except Exception:
+            pass
+        shutil.rmtree(target_dir, ignore_errors=True)
+        print(f"Job {job_id} completed successfully")
+    except Exception as e:
+        print(f"Error in job {job_id}: {str(e)}")
+        jobs[job_id]["status"] = "failed"
+        jobs[job_id]["error"] = str(e)
+        if client_id and client_id in websocket_connections:
+            try:
+                await websocket_connections[client_id].send_json(
+                    {"type": "error", "data": {"job_id": job_id, "error": str(e)}}
+                )
+            except Exception:
+                pass
+# -------------------------------------------------------------------------
+# API Endpoints
+# -------------------------------------------------------------------------
+@app.on_event("startup")
+async def startup_event():
+    """Load model on startup"""
+    load_model()
+@app.get("/")
+async def root():
+    """Health check endpoint"""
+    return {"status": "ok", "service": "Depth Anything 3 Inference API"}
+@app.post("/inference")
+async def create_inference(request: InferenceRequest, token: str = Query(...)):
+    """
+    Submit an inference job
+    Args:
+        request: InferenceRequest containing images, client_id, options
+        token: Authentication token (currently not validated, for compatibility)
+    Returns:
+        InferenceResponse with job_id
+    """
+    # Generate unique job ID
+    job_id = str(uuid.uuid4())
+    # Create temporary directory for images
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
+    target_dir = f"/tmp/da3_job_{job_id}_{timestamp}"
+    target_dir_images = os.path.join(target_dir, "images")
+    os.makedirs(target_dir_images, exist_ok=True)
+    # Decode and save images
+    try:
+        for img_data in request.images:
+            img_bytes = base64.b64decode(img_data.data)
+            img_path = os.path.join(target_dir_images, img_data.filename)
+            with open(img_path, "wb") as f:
+                f.write(img_bytes)
+        # Initialize job
+        jobs[job_id] = {
+            "status": "queued",
+            "result": None,
+            "created_at": datetime.now().isoformat(),
+        }
+        # Start inference in background
+        asyncio.create_task(run_inference(job_id, target_dir, request.client_id, request.options))
+        return InferenceResponse(job_id=job_id, status="queued")
+    except Exception as e:
+        shutil.rmtree(target_dir, ignore_errors=True)
+        raise HTTPException(status_code=400, detail=f"Failed to process images: {str(e)}")
+@app.get("/result/{job_id}")
+async def get_result(job_id: str, token: str = Query(...)):
+    """
+    Get inference result for a job
+    Args:
+        job_id: Job ID
+        token: Authentication token (currently not validated, for compatibility)
+    Returns:
+        Job result with artifacts
+    """
+    if job_id not in jobs:
+        raise HTTPException(status_code=404, detail="Job not found")
+    job = jobs[job_id]
+    if job["status"] == "failed":
+        raise HTTPException(status_code=500, detail=job.get("error", "Job failed"))
+    if job["status"] != "completed":
+        return {job_id: {"status": job["status"]}}
+    return {job_id: job["result"]}
+@app.websocket("/ws")
+async def websocket_endpoint(
+    websocket: WebSocket, clientId: str = Query(...), token: str = Query(...)
+):
+    """
+    WebSocket endpoint for real-time progress updates
+    Args:
+        websocket: WebSocket connection
+        clientId: Client ID
+        token: Authentication token (currently not validated, for compatibility)
+    """
+    await websocket.accept()
+    websocket_connections[clientId] = websocket
+    try:
+        while True:
+            # Keep connection alive
+            data = await websocket.receive_text()
+            # Echo back for heartbeat
+            await websocket.send_text(data)
+    except Exception as e:
+        print(f"WebSocket error for client {clientId}: {str(e)}")
+    finally:
+        if clientId in websocket_connections:
+            del websocket_connections[clientId]
+@app.get("/history/{job_id}")
+async def get_history(job_id: str, token: str = Query(...)):
+    """
+    Get job history (alias for /result for compatibility)
+    Args:
+        job_id: Job ID
+        token: Authentication token
+    Returns:
+        Job history
+    """
+    return await get_result(job_id, token)
+# -------------------------------------------------------------------------
+# Main
+# -------------------------------------------------------------------------
+if __name__ == "__main__":
+    # Run server (default port 7860)
+    uvicorn.run(app, host="0.0.0.0", port=7860, log_level="info")

app.py CHANGED Viewed

@@ -13,89 +13,416 @@
 # limitations under the License.
 """
-Hugging Face Spaces App for Depth Anything 3.
-This app uses the @spaces.GPU decorator to dynamically allocate GPU resources
-for model inference on Hugging Face Spaces.
 """
 import os
-import spaces
 from depth_anything_3.app.gradio_app import DepthAnything3App
 from depth_anything_3.app.modules.model_inference import ModelInference
-# Apply @spaces.GPU decorator to run_inference method
-# This ensures GPU operations happen in isolated subprocess
-# Model loading and inference will occur in GPU subprocess, not main process
-original_run_inference = ModelInference.run_inference
-@spaces.GPU(duration=120)  # Request GPU for up to 120 seconds per inference
-def gpu_run_inference(self, *args, **kwargs):
     """
-    GPU-accelerated inference with Spaces decorator.
-    This function runs in a GPU subprocess where:
-    - Model is loaded and moved to GPU (safe)
-    - CUDA operations are allowed
-    - All CUDA tensors are moved to CPU before return (for pickle safety)
     """
-    return original_run_inference(self, *args, **kwargs)
-# Replace the original method with the GPU-decorated version
-ModelInference.run_inference = gpu_run_inference
-# Initialize and launch the app
 if __name__ == "__main__":
-    # Configure directories for Hugging Face Spaces
     model_dir = os.environ.get("DA3_MODEL_DIR", "depth-anything/DA3NESTED-GIANT-LARGE")
     workspace_dir = os.environ.get("DA3_WORKSPACE_DIR", "workspace/gradio")
     gallery_dir = os.environ.get("DA3_GALLERY_DIR", "workspace/gallery")
     # Create directories if they don't exist
     os.makedirs(workspace_dir, exist_ok=True)
     os.makedirs(gallery_dir, exist_ok=True)
-    # Initialize the app
     app = DepthAnything3App(
         model_dir=model_dir,
         workspace_dir=workspace_dir,
-        gallery_dir=gallery_dir
     )
     # Check if examples directory exists
     examples_dir = os.path.join(workspace_dir, "examples")
     examples_exist = os.path.exists(examples_dir)
-    # Check if caching is enabled via environment variable (default: True if examples exist)
-    # Allow disabling via environment variable: DA3_CACHE_EXAMPLES=false
     cache_examples_env = os.environ.get("DA3_CACHE_EXAMPLES", "").lower()
     if cache_examples_env in ("false", "0", "no"):
         cache_examples = False
     elif cache_examples_env in ("true", "1", "yes"):
         cache_examples = True
     else:
-        # Default: enable caching if examples directory exists
         cache_examples = examples_exist
-    # Get cache_gs_tag from environment variable (default: "dl3dv")
     cache_gs_tag = os.environ.get("DA3_CACHE_GS_TAG", "dl3dv")
-    # Launch with Spaces-friendly settings
-    print("🚀 Launching Depth Anything 3 on Hugging Face Spaces...")
-    print(f"📦 Model Directory: {model_dir}")
     print(f"📁 Workspace Directory: {workspace_dir}")
     print(f"🖼️  Gallery Directory: {gallery_dir}")
     print(f"💾 Cache Examples: {cache_examples}")
     if cache_examples:
         if cache_gs_tag:
-            print(f"🏷️  Cache GS Tag: '{cache_gs_tag}' (scenes matching this tag will use high-res + 3DGS)")
         else:
             print("🏷️  Cache GS Tag: None (all scenes will use low-res only)")
-    # Pre-cache examples if requested
     if cache_examples:
         print("\n" + "=" * 60)
-        print("Pre-caching mode enabled")
         if cache_gs_tag:
             print(f"Scenes containing '{cache_gs_tag}' will use HIGH-RES + 3DGS")
             print("Other scenes will use LOW-RES only")
@@ -112,11 +439,10 @@ if __name__ == "__main__":
             gs_trj_mode="smooth",
             gs_video_quality="low",
         )
-    # Launch with minimal, Spaces-compatible configuration
-    # Some parameters may cause routing issues, so we use minimal config
     app.launch(
-        host="0.0.0.0",  # Required for Spaces
-        port=7860,       # Standard Gradio port
-        share=False      # Not needed on Spaces
     )

 # limitations under the License.
 """
+Depth Anything 3 Frontend App (Gradio UI) with remote backend inference via WebSocket/HTTP.
+- Frontend responsibilities remain unchanged (UI, gallery, export glb/3DGS, caching examples)
+- Model inference is delegated to a remote backend specified by DA3_HOST
+- Communication helpers (_open_ws/_submit_inference/_get_result) are defined here (app.py),
+  similar to VGGT repo style.
 """
 import os
+import glob
+import json
+import uuid
+import base64
+import io
+from typing import Any, Dict, Optional, Tuple
+import numpy as np
+import requests
+import websocket
+import zipfile
 from depth_anything_3.app.gradio_app import DepthAnything3App
 from depth_anything_3.app.modules.model_inference import ModelInference
+from depth_anything_3.specs import Prediction
+# -------------------------------------------------------------------------
+# Remote Backend Host (must be set)
+# -------------------------------------------------------------------------
+DA3_HOST = os.getenv("DA3_HOST", None)  # Expected format: "ip:port"
+# -------------------------------------------------------------------------
+# Remote service communication functions (VGGT style)
+# -------------------------------------------------------------------------
+def _open_ws(client_id: str, token: str):
+    """Open WebSocket connection to remote DA3 service"""
+    if not DA3_HOST:
+        raise RuntimeError(
+            "DA3_HOST is not set. Please set env DA3_HOST=ip:port for remote inference."
+        )
+    ws = websocket.WebSocket()
+    ws.connect(f"ws://{DA3_HOST}/ws?clientId={client_id}&token={token}", timeout=1800)
+    return ws
+def _submit_inference(target_dir: str, client_id: str, token: str, options: Dict[str, Any]) -> str:
+    """Submit inference job to remote DA3 service"""
+    if not DA3_HOST:
+        raise RuntimeError(
+            "DA3_HOST is not set. Please set env DA3_HOST=ip:port for remote inference."
+        )
+    # Prepare image files for upload
+    image_names = glob.glob(os.path.join(target_dir, "images", "*"))
+    image_names = sorted(image_names)
+    if len(image_names) == 0:
+        raise ValueError("No images found. Check your upload.")
+    # Encode images as base64
+    images_data = []
+    for img_path in image_names:
+        with open(img_path, "rb") as f:
+            img_bytes = f.read()
+            img_b64 = base64.b64encode(img_bytes).decode("utf-8")
+            images_data.append({"filename": os.path.basename(img_path), "data": img_b64})
+    payload = {
+        "images": images_data,
+        "client_id": client_id,
+        "options": options,
+    }
+    resp = requests.post(f"http://{DA3_HOST}/inference?token={token}", json=payload, timeout=1800)
+    if resp.status_code != 200:
+        raise RuntimeError(f"DA3 service /inference error: {resp.text}")
+    data = resp.json()
+    if "job_id" not in data:
+        raise RuntimeError(f"/inference response missing job_id: {data}")
+    return data["job_id"]
+def _get_result(job_id: str, token: str) -> Dict[str, Any]:
+    """Get inference result from remote DA3 service"""
+    if not DA3_HOST:
+        raise RuntimeError(
+            "DA3_HOST is not set. Please set env DA3_HOST=ip:port for remote inference."
+        )
+    resp = requests.get(f"http://{DA3_HOST}/result/{job_id}?token={token}", timeout=1800)
+    resp.raise_for_status()
+    return resp.json()
+def _deserialize_np(b64_str: str) -> Any:
+    """Deserialize base64-encoded numpy array saved via np.save into Python object"""
+    arr_bytes = base64.b64decode(b64_str)
+    return np.load(io.BytesIO(arr_bytes), allow_pickle=True)
+def _build_prediction_from_remote(preds: Dict[str, Any]) -> Prediction:
     """
+    Build a lightweight Prediction object from remote 'predictions' dictionary.
+    Expected keys (base64 npy unless otherwise specified):
+      - depths: <b64npy> (N,H,W)
+      - conf: <b64npy> (N,H,W)  [required by export_to_glb]
+      - extrinsics: <b64npy> (N,4,4)
+      - intrinsics: <b64npy> (N,3,3)
+      - processed_images: <b64npy> (N,H,W,3) uint8  [required by export_to_glb]
+      - sky_mask: <b64npy> (optional)
+      - gaussians: {means, scales, rotations, harmonics, opacities} (optional, each b64npy)
     """
+    depth = _deserialize_np(preds.get("depths")) if preds.get("depths") is not None else None
+    conf = _deserialize_np(preds.get("conf")) if preds.get("conf") is not None else None
+    extrinsics = (
+        _deserialize_np(preds.get("extrinsics")) if preds.get("extrinsics") is not None else None
+    )
+    intrinsics = (
+        _deserialize_np(preds.get("intrinsics")) if preds.get("intrinsics") is not None else None
+    )
+    processed_images = (
+        _deserialize_np(preds.get("processed_images"))
+        if preds.get("processed_images") is not None
+        else None
+    )
+    sky_mask = (
+        _deserialize_np(preds.get("sky_mask")) if preds.get("sky_mask") is not None else None
+    )
+    # If conf is missing, fallback to ones with same shape as depth to satisfy export_to_glb requirements
+    if conf is None and depth is not None:
+        conf = np.ones_like(depth, dtype=np.float32)
+    gaussians_obj: Optional[Gaussians] = None
+    if preds.get("gaussians") is not None:
+        gdict = preds["gaussians"]
+        means = _deserialize_np(gdict.get("means")) if gdict.get("means") is not None else None
+        scales = _deserialize_np(gdict.get("scales")) if gdict.get("scales") is not None else None
+        rotations = (
+            _deserialize_np(gdict.get("rotations")) if gdict.get("rotations") is not None else None
+        )
+        harmonics = (
+            _deserialize_np(gdict.get("harmonics")) if gdict.get("harmonics") is not None else None
+        )
+        opacities = (
+            _deserialize_np(gdict.get("opacities")) if gdict.get("opacities") is not None else None
+        )
+        # Convert numpy arrays to torch tensors on CPU
+        def to_tensor(x):
+            return torch.from_numpy(x) if x is not None else None
+        gaussians_obj = Gaussians(
+            means=to_tensor(means),
+            scales=to_tensor(scales),
+            rotations=to_tensor(rotations),
+            harmonics=to_tensor(harmonics),
+            opacities=to_tensor(opacities),
+        )
+    pred = Prediction(
+        depth=depth,
+        is_metric=1,
+        sky=sky_mask,
+        conf=conf,
+        extrinsics=extrinsics,
+        intrinsics=intrinsics,
+        processed_images=processed_images,
+        gaussians=gaussians_obj,
+        aux={},  # optional aux dict
+        scale_factor=None,
+    )
+    return pred
+# -------------------------------------------------------------------------
+# Monkey-patch ModelInference.run_inference to use remote backend
+# -------------------------------------------------------------------------
+def remote_run_inference(
+    self: ModelInference,
+    target_dir: str,
+    filter_black_bg: bool = False,
+    filter_white_bg: bool = False,
+    process_res_method: str = "upper_bound_resize",
+    show_camera: bool = True,
+    selected_first_frame: Optional[str] = None,
+    save_percentage: float = 30.0,
+    num_max_points: int = 1_000_000,
+    infer_gs: bool = False,
+    gs_trj_mode: str = "extend",
+    gs_video_quality: str = "high",
+) -> Tuple[Any, Dict[int, Dict[str, Any]]]:
+    """
+    Remote inference via DA3_HOST. Frontend ONLY consumes artifacts returned by backend:
+      - Writes scene.glb, depth_vis/, predictions.npz, (optional) gs_video.mp4 into target_dir
+      - Builds processed_data dict from files
+      - Returns (prediction, processed_data) where prediction is reconstructed from predictions.npz
+    """
+    if not DA3_HOST:
+        raise RuntimeError(
+            "DA3_HOST is not set. Please set env DA3_HOST=ip:port for remote inference."
+        )
+    # Validate images exist
+    image_folder_path = os.path.join(target_dir, "images")
+    all_image_paths = sorted(glob.glob(os.path.join(image_folder_path, "*")))
+    if len(all_image_paths) == 0:
+        raise ValueError("No images found. Check your upload.")
+    # Compose options to send to backend (no export on frontend)
+    options = {
+        "process_res_method": process_res_method,
+        "selected_first_frame": selected_first_frame or "",
+        "infer_gs": bool(infer_gs),
+        "conf_thresh_percentile": float(save_percentage),
+        "num_max_points": int(num_max_points),
+        "show_cameras": bool(show_camera),
+        "gs_trj_mode": gs_trj_mode,
+        "gs_video_quality": gs_video_quality,
+    }
+    # IDs and WebSocket
+    client_id = str(uuid.uuid4())
+    token = str(uuid.uuid4())
+    ws = _open_ws(client_id, token)
+    # Submit inference job
+    job_id = _submit_inference(target_dir, client_id, token, options)
+    # Monitor progress via WebSocket
+    ws.settimeout(180)
+    try:
+        while True:
+            out = ws.recv()
+            if isinstance(out, (bytes, bytearray)):
+                continue
+            msg = json.loads(out)
+            if msg.get("type") == "executing":
+                data = msg.get("data", {})
+                if data.get("job_id") != job_id:
+                    continue
+                node = data.get("node")
+                if node is None:
+                    # Job complete
+                    break
+    except Exception as e:
+        print(f"WebSocket error: {e}")
+    finally:
+        try:
+            ws.close()
+        except Exception:
+            pass
+    # Fetch final result
+    result = _get_result(job_id, token)
+    if job_id not in result:
+        raise RuntimeError(f"Remote result missing job_id entry: {result}")
+    job_entry = result[job_id]
+    if job_entry.get("status") != "completed":
+        raise RuntimeError(f"Remote job not completed or failed: {job_entry}")
+    artifacts = job_entry.get("artifacts", {})
+    if not artifacts:
+        raise RuntimeError(f"No artifacts returned from backend for job {job_id}")
+    # Write artifacts to target_dir
+    os.makedirs(target_dir, exist_ok=True)
+    # scene.glb
+    glb_b64 = artifacts.get("glb")
+    if glb_b64:
+        with open(os.path.join(target_dir, "scene.glb"), "wb") as f:
+            f.write(base64.b64decode(glb_b64))
+    # depth_vis
+    depth_vis_b64 = artifacts.get("depth_vis_zip")
+    if depth_vis_b64:
+        depth_vis_dir = os.path.join(target_dir, "depth_vis")
+        os.makedirs(depth_vis_dir, exist_ok=True)
+        zip_bytes = base64.b64decode(depth_vis_b64)
+        with zipfile.ZipFile(io.BytesIO(zip_bytes), "r") as zf:
+            zf.extractall(depth_vis_dir)
+    # predictions.npz
+    pred_npz_b64 = artifacts.get("predictions_npz")
+    prediction: Any = None
+    if pred_npz_b64:
+        npz_path = os.path.join(target_dir, "predictions.npz")
+        with open(npz_path, "wb") as f:
+            f.write(base64.b64decode(pred_npz_b64))
+        try:
+            loaded = np.load(npz_path, allow_pickle=True)
+            # reconstruct Prediction dataclass from npz content
+            images = loaded["images"] if "images" in loaded.files else None
+            depths = loaded["depths"] if "depths" in loaded.files else None
+            conf = loaded["conf"] if "conf" in loaded.files else None
+            extrinsics = loaded["extrinsics"] if "extrinsics" in loaded.files else None
+            intrinsics = loaded["intrinsics"] if "intrinsics" in loaded.files else None
+            prediction = Prediction(
+                depth=depths,
+                is_metric=1,
+                sky=None,
+                conf=(
+                    conf
+                    if conf is not None
+                    else (np.ones_like(depths, dtype=np.float32) if depths is not None else None)
+                ),
+                extrinsics=extrinsics,
+                intrinsics=intrinsics,
+                processed_images=images,
+                gaussians=None,
+                aux={},
+                scale_factor=None,
+            )
+        except Exception as e:
+            print(f"Failed to reconstruct Prediction from predictions.npz: {e}")
+            prediction = Prediction(
+                depth=None,
+                is_metric=1,
+                sky=None,
+                conf=None,
+                extrinsics=None,
+                intrinsics=None,
+                processed_images=None,
+                gaussians=None,
+                aux={},
+                scale_factor=None,
+            )
+    # Optional GS video
+    gs_video_b64 = artifacts.get("gs_video")
+    if gs_video_b64:
+        gs_dir = os.path.join(target_dir, "gs_video")
+        os.makedirs(gs_dir, exist_ok=True)
+        with open(os.path.join(gs_dir, "gs_video.mp4"), "wb") as f:
+            f.write(base64.b64decode(gs_video_b64))
+    # Build processed_data from files (depth_vis + optional images from predictions.npz)
+    processed_data = self._process_results(target_dir, prediction, all_image_paths)
+    return prediction, processed_data
+# Replace original ModelInference.run_inference with remote version
+ModelInference.run_inference = remote_run_inference
+# -------------------------------------------------------------------------
+# Initialize and launch the frontend app (unchanged UI behavior)
+# -------------------------------------------------------------------------
 if __name__ == "__main__":
+    # Enforce remote backend configuration
+    if not DA3_HOST:
+        raise RuntimeError(
+            "DA3_HOST is not set. Please export DA3_HOST=ip:port to use remote backend inference."
+        )
+    # Configure directories for frontend workspace/gallery
     model_dir = os.environ.get("DA3_MODEL_DIR", "depth-anything/DA3NESTED-GIANT-LARGE")
     workspace_dir = os.environ.get("DA3_WORKSPACE_DIR", "workspace/gradio")
     gallery_dir = os.environ.get("DA3_GALLERY_DIR", "workspace/gallery")
     # Create directories if they don't exist
     os.makedirs(workspace_dir, exist_ok=True)
     os.makedirs(gallery_dir, exist_ok=True)
+    # Initialize the app (frontend UI)
     app = DepthAnything3App(
         model_dir=model_dir,
         workspace_dir=workspace_dir,
+        gallery_dir=gallery_dir,
     )
     # Check if examples directory exists
     examples_dir = os.path.join(workspace_dir, "examples")
     examples_exist = os.path.exists(examples_dir)
+    # Check caching (default: True if examples exist)
     cache_examples_env = os.environ.get("DA3_CACHE_EXAMPLES", "").lower()
     if cache_examples_env in ("false", "0", "no"):
         cache_examples = False
     elif cache_examples_env in ("true", "1", "yes"):
         cache_examples = True
     else:
         cache_examples = examples_exist
+    # Cache tag for 3DGS
     cache_gs_tag = os.environ.get("DA3_CACHE_GS_TAG", "dl3dv")
+    # Launch logs
+    print("🚀 Launching Depth Anything 3 Frontend (remote backend mode)...")
+    print(f"🌐 DA3_HOST (backend): {DA3_HOST}")
+    print(f"📦 Model Directory (frontend env only): {model_dir}")
     print(f"📁 Workspace Directory: {workspace_dir}")
     print(f"🖼️  Gallery Directory: {gallery_dir}")
     print(f"💾 Cache Examples: {cache_examples}")
     if cache_examples:
         if cache_gs_tag:
+            print(
+                f"🏷️  Cache GS Tag: '{cache_gs_tag}' (scenes matching this tag will use high-res + 3DGS)"
+            )
         else:
             print("🏷️  Cache GS Tag: None (all scenes will use low-res only)")
+    # Pre-cache examples (requests inference from remote backend; artifacts still stored locally)
     if cache_examples:
         print("\n" + "=" * 60)
+        print("Pre-caching mode enabled (remote backend inference)")
         if cache_gs_tag:
             print(f"Scenes containing '{cache_gs_tag}' will use HIGH-RES + 3DGS")
             print("Other scenes will use LOW-RES only")
             gs_trj_mode="smooth",
             gs_video_quality="low",
         )
+    # Launch Gradio frontend (minimal, Spaces-compatible configuration)
     app.launch(
+        host="0.0.0.0",
+        port=7860,
+        share=False,
     )

pyproject.toml CHANGED Viewed

@@ -14,14 +14,14 @@ authors = [{ name = "Your Name" }]
 dependencies = [
     "pre-commit",
     "trimesh",
-    "torch>=2",
-    "torchvision",
     "einops",
     "huggingface_hub",
     "imageio",
     "numpy<2",
     "opencv-python",
-    "xformers",
     "open3d",
     "fastapi",
     "unicorn",

 dependencies = [
     "pre-commit",
     "trimesh",
+  #  "torch>=2",
+  #  "torchvision",
     "einops",
     "huggingface_hub",
     "imageio",
     "numpy<2",
     "opencv-python",
+  #  "xformers",
     "open3d",
     "fastapi",
     "unicorn",

requirements.txt CHANGED Viewed

@@ -1,8 +1,6 @@
 # Core dependencies
 torch>=2.0.0
-torchvision
 einops
-huggingface_hub
 numpy<2
 opencv-python
@@ -10,12 +8,8 @@ opencv-python
 gradio>=5.0.0
 spaces
 pillow>=9.0
-evo
 # 3D and visualization
-trimesh
-open3d
-plyfile
 # Image processing
 imageio
@@ -23,26 +17,11 @@ pillow_heif
 safetensors
 # Video processing
-moviepy==1.0.3
 # Math and geometry
-e3nn
 # Utilities
 requests
 omegaconf
 typer>=0.9.0
-# Web frameworks (if using API features)
-fastapi
-uvicorn
-# xformers - commented out due to potential build issues on Spaces
-# If needed, uncomment and use a version compatible with your PyTorch/CUDA:
-# xformers==0.0.22
-# Or install after deployment: pip install xformers --no-deps
-# 3D Gaussian Splatting
-# Note: This requires CUDA during build. If build fails on Spaces, see alternative solutions.
-gsplat @ https://github.com/nerfstudio-project/gsplat/releases/download/v1.5.3/gsplat-1.5.3+pt24cu124-cp310-cp310-linux_x86_64.whl

 # Core dependencies
 torch>=2.0.0
 einops
 numpy<2
 opencv-python
 gradio>=5.0.0
 spaces
 pillow>=9.0
 # 3D and visualization
 # Image processing
 imageio
 safetensors
 # Video processing
 # Math and geometry
 # Utilities
 requests
+websocket-client
 omegaconf
 typer>=0.9.0