Spaces:

eduardo4547
/

hyper-reality-sam2-gpu

Running on Zero

App Files Files Community

eduardo4547 commited on Apr 28

Commit

96e3249

verified ·

1 Parent(s): cbbef9c

Upload 4 files

Browse files

Files changed (1) hide show

app.py +38 -23

app.py CHANGED Viewed

@@ -5,7 +5,16 @@ import torch
 from pathlib import Path
 from huggingface_hub import hf_hub_download
 from PIL import Image
-import spaces  # <-- Importante para Hugging Face ZeroGPU
 # --- IMPORTACIONES DE MODELOS ---
 from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
@@ -21,13 +30,12 @@ SAM2_CONFIG = "configs/sam2.1/sam2.1_hiera_b+.yaml"
 # GroundingDINO
 GDINO_ID = "IDEA-Research/grounding-dino-base"
-DEVICE = "cuda"
 # Variables globales para Lazy Loading (ZeroGPU)
 sam2_predictor = None
 gdino_model = None
-clip_model = None
-clip_processor = None
 COLOR_PALETTE = [
     (0, 255, 255, 150),  # Cian (queda muy bien para resaltar)
@@ -69,11 +77,12 @@ def segmentar_con_dino_y_sam(imagen: Image.Image, texto: str, box_threshold: flo
     # 1. LAZY LOADING: Inicializar modelos en la GPU la primera vez
     if sam2_predictor is None:
-        print("Inicializando GroundingDINO y SAM 2.1 en GPU...")
-        torch.autocast("cuda", dtype=torch.bfloat16).__enter__()
-        if torch.cuda.get_device_properties(0).major >= 8:
-            torch.backends.cuda.matmul.allow_tf32 = True
-            torch.backends.cudnn.allow_tf32 = True
         # Cargar SAM 2.1 en modo Predictor (para cajas), no AutomaticMaskGenerator
         checkpoint_path = download_sam_checkpoint()
@@ -98,21 +107,26 @@ def segmentar_con_dino_y_sam(imagen: Image.Image, texto: str, box_threshold: flo
     outputs = gdino_model(**inputs)
     # Extraer las cajas con un umbral de confianza
-    results = gdino_processor.post_process_grounded_object_detection(
-        outputs,
-        inputs.input_ids,
-        box_threshold=box_threshold,
-        text_threshold=0.25,
-        target_sizes=[imagen.size[::-1]] # (alto, ancho)
-    )[0]
-    cajas = results["boxes"] # Tensor con coordenadas [x1, y1, x2, y2]
-    etiquetas = results["labels"]
-    scores = results["scores"]
     if len(cajas) == 0:
         return imagen, f"No se encontró nada para '{texto}' con el umbral actual ({box_threshold}). Intenta bajarlo."
     # 3. SAM 2.1: Segmentar dentro de las cajas encontradas
     sam2_predictor.set_image(imagen_np)
@@ -127,8 +141,9 @@ def segmentar_con_dino_y_sam(imagen: Image.Image, texto: str, box_threshold: flo
     )
     # Las máscaras de SAM tienen forma (N, 1, H, W). Las aplanamos a (N, H, W)
-    masks = masks.squeeze(1)
     # 4. SUPERPONER MÁSCARAS
     resultado_img = create_mask_overlay(imagen, masks)

 from pathlib import Path
 from huggingface_hub import hf_hub_download
 from PIL import Image
+# --- GESTIÓN DE ENTORNO (Hugging Face Spaces vs. Local) ---
+try:
+    import spaces
+except ImportError:
+    # Si 'spaces' no existe, creamos un decorador falso que no hace nada.
+    # Esto permite que el código se ejecute localmente sin el decorador @spaces.GPU.
+    class DummySpaces:
+        def GPU(self, fn): return fn
+    spaces = DummySpaces()
 # --- IMPORTACIONES DE MODELOS ---
 from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
 # GroundingDINO
 GDINO_ID = "IDEA-Research/grounding-dino-base"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 # Variables globales para Lazy Loading (ZeroGPU)
 sam2_predictor = None
 gdino_model = None
+gdino_processor = None
 COLOR_PALETTE = [
     (0, 255, 255, 150),  # Cian (queda muy bien para resaltar)
     # 1. LAZY LOADING: Inicializar modelos en la GPU la primera vez
     if sam2_predictor is None:
+        print(f"Inicializando modelos en dispositivo: {DEVICE.upper()}...")
+        if DEVICE == "cuda":
+            torch.autocast("cuda", dtype=torch.bfloat16).__enter__()
+            if torch.cuda.get_device_properties(0).major >= 8:
+                torch.backends.cuda.matmul.allow_tf32 = True
+                torch.backends.cudnn.allow_tf32 = True
         # Cargar SAM 2.1 en modo Predictor (para cajas), no AutomaticMaskGenerator
         checkpoint_path = download_sam_checkpoint()
     outputs = gdino_model(**inputs)
     # Extraer las cajas con un umbral de confianza
+    # Modificación para compatibilidad: filtramos manualmente en lugar de pasar el umbral a la función.
+    results = gdino_processor.post_process_grounded_object_detection(outputs, inputs.input_ids, target_sizes=[imagen.size[::-1]])[0]
+    # Filtrar los resultados basados en el umbral de la caja (box_threshold)
+    boxes_filt = []
+    scores_filt = []
+    labels_filt = []
+    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+        if score > box_threshold:
+            boxes_filt.append(box)
+            scores_filt.append(score)
+            labels_filt.append(label)
+    # Convertir las listas filtradas de nuevo a tensores
+    cajas = torch.stack(boxes_filt) if boxes_filt else torch.empty((0, 4))
+    scores = torch.stack(scores_filt) if scores_filt else torch.empty((0,))
+    etiquetas = labels_filt
     if len(cajas) == 0:
         return imagen, f"No se encontró nada para '{texto}' con el umbral actual ({box_threshold}). Intenta bajarlo."
     # 3. SAM 2.1: Segmentar dentro de las cajas encontradas
     sam2_predictor.set_image(imagen_np)
     )
     # Las máscaras de SAM tienen forma (N, 1, H, W). Las aplanamos a (N, H, W)
+    # Solo hacemos squeeze si hay máscaras, para evitar errores con tensores vacíos.
+    if masks.ndim == 4 and masks.shape[1] == 1:
+        masks = masks.squeeze(1)
     # 4. SUPERPONER MÁSCARAS
     resultado_img = create_mask_overlay(imagen, masks)