Spaces:

dvinix
/

navora

Sleeping

App Files Files Community

divinixx commited on 26 days ago

Commit

139033f

1 Parent(s): fab6ee2

Skip MiDaS depth model on CPU for faster performance

Browse files

Files changed (2) hide show

app/models/loader.py +22 -5
app/services/pipeline.py +26 -12

app/models/loader.py CHANGED Viewed

@@ -3,11 +3,15 @@ Model loader — loads BLIP-2, YOLOv8, and MiDaS at startup.
 """
 import logging
 import torch
 from transformers import Blip2Processor, Blip2ForConditionalGeneration
 from ultralytics import YOLO
 from pathlib import Path
 log = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
@@ -43,11 +47,24 @@ def load_models() -> dict:
     yolo_model.to(device)
     log.info("YOLOv8 loaded.")
-    log.info("Loading MiDaS (small)...")
-    midas_model = torch.hub.load("intel-isl/MiDaS", "MiDaS_small", trust_repo=True)
-    midas_model.to(device)
-    midas_model.eval()
-    log.info("MiDaS loaded.")
     log.info("All models ready.")
     return {

 """
 import logging
+import os
 import torch
 from transformers import Blip2Processor, Blip2ForConditionalGeneration
 from ultralytics import YOLO
 from pathlib import Path
+# Trust all torch hub repos (needed for MiDaS dependencies)
+os.environ['TORCH_HOME'] = '/tmp/torch_cache'
 log = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
     yolo_model.to(device)
     log.info("YOLOv8 loaded.")
+    # Skip depth estimation on CPU for faster performance
+    # Depth is used for prioritization but not critical for navigation
+    if device.type == "cpu":
+        log.info("Skipping MiDaS on CPU for better performance...")
+        midas_model = None
+    else:
+        log.info("Loading MiDaS (small)...")
+        torch.hub.set_dir('/tmp/torch_cache')
+        midas_model = torch.hub.load(
+            "intel-isl/MiDaS",
+            "MiDaS_small",
+            trust_repo=True,
+            force_reload=False,
+            skip_validation=True
+        )
+        midas_model.to(device)
+        midas_model.eval()
+        log.info("MiDaS loaded.")
     log.info("All models ready.")
     return {

app/services/pipeline.py CHANGED Viewed

@@ -108,22 +108,30 @@ def run_pipeline_frame_data(frame: np.ndarray, models: Dict) -> Dict:
     blip_model = models.get("blip2_model")
     blip_processor = models.get("blip2_processor")
     yolo_model = models.get("yolo_model")
-    midas_model = models.get("midas_model")
     device = models.get("device")
-    if any(v is None for v in [blip_model, blip_processor, yolo_model, midas_model, device]):
-        missing = [
-            k for k, v in [
-                ("blip2_model", blip_model), ("blip2_processor", blip_processor),
-                ("yolo_model", yolo_model), ("midas_model", midas_model), ("device", device),
-            ] if v is None
-        ]
-        raise RuntimeError(f"Models not fully loaded. Missing: {missing}")
     # --- Run each model stage ---
     desc, caption_latency = description(frame, blip_model, blip_processor, device)
     dets, detection_latency = detect_objects(frame, yolo_model)
-    depth_map, depth_latency = estimate_depth(frame, midas_model, device)
     main_feature = extract_main_feature(dets)
     unique_objects = _unique_ordered(dets["class_names"])
@@ -133,8 +141,14 @@ def run_pipeline_frame_data(frame: np.ndarray, models: Dict) -> Dict:
     detection_details = []
     for box, label, conf in zip(dets["boxes"], dets["class_names"], dets["confidences"]):
         x1, y1, x2, y2 = box
-        region = depth_map[max(0, y1):min(h, y2), max(0, x1):min(w, x2)]
-        median_depth = float(np.median(region)) if region.size > 0 else None
         detection_details.append({
             "label": label,
             "confidence": round(float(conf), 3),

     blip_model = models.get("blip2_model")
     blip_processor = models.get("blip2_processor")
     yolo_model = models.get("yolo_model")
+    midas_model = models.get("midas_model")  # Can be None on CPU
     device = models.get("device")
+    # Check required models (MiDaS is optional)
+    required_models = [
+        ("blip2_model", blip_model),
+        ("blip2_processor", blip_processor),
+        ("yolo_model", yolo_model),
+        ("device", device),
+    ]
+    missing = [k for k, v in required_models if v is None]
+    if missing:
+        raise RuntimeError(f"Required models not loaded. Missing: {missing}")
     # --- Run each model stage ---
     desc, caption_latency = description(frame, blip_model, blip_processor, device)
     dets, detection_latency = detect_objects(frame, yolo_model)
+    # Skip depth estimation if MiDaS not loaded (CPU optimization)
+    if midas_model is not None:
+        depth_map, depth_latency = estimate_depth(frame, midas_model, device)
+    else:
+        depth_map = None
+        depth_latency = 0.0
     main_feature = extract_main_feature(dets)
     unique_objects = _unique_ordered(dets["class_names"])
     detection_details = []
     for box, label, conf in zip(dets["boxes"], dets["class_names"], dets["confidences"]):
         x1, y1, x2, y2 = box
+        # Calculate depth only if depth_map available
+        if depth_map is not None:
+            region = depth_map[max(0, y1):min(h, y2), max(0, x1):min(w, x2)]
+            median_depth = float(np.median(region)) if region.size > 0 else None
+        else:
+            median_depth = None
         detection_details.append({
             "label": label,
             "confidence": round(float(conf), 3),