Spaces:
Running on Zero
Running on Zero
Upload 4 files
Browse files
app.py
CHANGED
|
@@ -5,7 +5,16 @@ import torch
|
|
| 5 |
from pathlib import Path
|
| 6 |
from huggingface_hub import hf_hub_download
|
| 7 |
from PIL import Image
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
# --- IMPORTACIONES DE MODELOS ---
|
| 11 |
from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
|
|
@@ -21,13 +30,12 @@ SAM2_CONFIG = "configs/sam2.1/sam2.1_hiera_b+.yaml"
|
|
| 21 |
# GroundingDINO
|
| 22 |
GDINO_ID = "IDEA-Research/grounding-dino-base"
|
| 23 |
|
| 24 |
-
DEVICE = "cuda"
|
| 25 |
|
| 26 |
# Variables globales para Lazy Loading (ZeroGPU)
|
| 27 |
sam2_predictor = None
|
| 28 |
gdino_model = None
|
| 29 |
-
|
| 30 |
-
clip_processor = None
|
| 31 |
|
| 32 |
COLOR_PALETTE = [
|
| 33 |
(0, 255, 255, 150), # Cian (queda muy bien para resaltar)
|
|
@@ -69,11 +77,12 @@ def segmentar_con_dino_y_sam(imagen: Image.Image, texto: str, box_threshold: flo
|
|
| 69 |
|
| 70 |
# 1. LAZY LOADING: Inicializar modelos en la GPU la primera vez
|
| 71 |
if sam2_predictor is None:
|
| 72 |
-
print("Inicializando
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
torch.
|
| 76 |
-
|
|
|
|
| 77 |
|
| 78 |
# Cargar SAM 2.1 en modo Predictor (para cajas), no AutomaticMaskGenerator
|
| 79 |
checkpoint_path = download_sam_checkpoint()
|
|
@@ -98,21 +107,26 @@ def segmentar_con_dino_y_sam(imagen: Image.Image, texto: str, box_threshold: flo
|
|
| 98 |
outputs = gdino_model(**inputs)
|
| 99 |
|
| 100 |
# Extraer las cajas con un umbral de confianza
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
if len(cajas) == 0:
|
| 114 |
return imagen, f"No se encontr贸 nada para '{texto}' con el umbral actual ({box_threshold}). Intenta bajarlo."
|
| 115 |
-
|
| 116 |
# 3. SAM 2.1: Segmentar dentro de las cajas encontradas
|
| 117 |
sam2_predictor.set_image(imagen_np)
|
| 118 |
|
|
@@ -127,8 +141,9 @@ def segmentar_con_dino_y_sam(imagen: Image.Image, texto: str, box_threshold: flo
|
|
| 127 |
)
|
| 128 |
|
| 129 |
# Las m谩scaras de SAM tienen forma (N, 1, H, W). Las aplanamos a (N, H, W)
|
| 130 |
-
|
| 131 |
-
|
|
|
|
| 132 |
# 4. SUPERPONER M脕SCARAS
|
| 133 |
resultado_img = create_mask_overlay(imagen, masks)
|
| 134 |
|
|
|
|
| 5 |
from pathlib import Path
|
| 6 |
from huggingface_hub import hf_hub_download
|
| 7 |
from PIL import Image
|
| 8 |
+
|
| 9 |
+
# --- GESTI脫N DE ENTORNO (Hugging Face Spaces vs. Local) ---
|
| 10 |
+
try:
|
| 11 |
+
import spaces
|
| 12 |
+
except ImportError:
|
| 13 |
+
# Si 'spaces' no existe, creamos un decorador falso que no hace nada.
|
| 14 |
+
# Esto permite que el c贸digo se ejecute localmente sin el decorador @spaces.GPU.
|
| 15 |
+
class DummySpaces:
|
| 16 |
+
def GPU(self, fn): return fn
|
| 17 |
+
spaces = DummySpaces()
|
| 18 |
|
| 19 |
# --- IMPORTACIONES DE MODELOS ---
|
| 20 |
from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
|
|
|
|
| 30 |
# GroundingDINO
|
| 31 |
GDINO_ID = "IDEA-Research/grounding-dino-base"
|
| 32 |
|
| 33 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 34 |
|
| 35 |
# Variables globales para Lazy Loading (ZeroGPU)
|
| 36 |
sam2_predictor = None
|
| 37 |
gdino_model = None
|
| 38 |
+
gdino_processor = None
|
|
|
|
| 39 |
|
| 40 |
COLOR_PALETTE = [
|
| 41 |
(0, 255, 255, 150), # Cian (queda muy bien para resaltar)
|
|
|
|
| 77 |
|
| 78 |
# 1. LAZY LOADING: Inicializar modelos en la GPU la primera vez
|
| 79 |
if sam2_predictor is None:
|
| 80 |
+
print(f"Inicializando modelos en dispositivo: {DEVICE.upper()}...")
|
| 81 |
+
if DEVICE == "cuda":
|
| 82 |
+
torch.autocast("cuda", dtype=torch.bfloat16).__enter__()
|
| 83 |
+
if torch.cuda.get_device_properties(0).major >= 8:
|
| 84 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 85 |
+
torch.backends.cudnn.allow_tf32 = True
|
| 86 |
|
| 87 |
# Cargar SAM 2.1 en modo Predictor (para cajas), no AutomaticMaskGenerator
|
| 88 |
checkpoint_path = download_sam_checkpoint()
|
|
|
|
| 107 |
outputs = gdino_model(**inputs)
|
| 108 |
|
| 109 |
# Extraer las cajas con un umbral de confianza
|
| 110 |
+
# Modificaci贸n para compatibilidad: filtramos manualmente en lugar de pasar el umbral a la funci贸n.
|
| 111 |
+
results = gdino_processor.post_process_grounded_object_detection(outputs, inputs.input_ids, target_sizes=[imagen.size[::-1]])[0]
|
| 112 |
+
|
| 113 |
+
# Filtrar los resultados basados en el umbral de la caja (box_threshold)
|
| 114 |
+
boxes_filt = []
|
| 115 |
+
scores_filt = []
|
| 116 |
+
labels_filt = []
|
| 117 |
+
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
|
| 118 |
+
if score > box_threshold:
|
| 119 |
+
boxes_filt.append(box)
|
| 120 |
+
scores_filt.append(score)
|
| 121 |
+
labels_filt.append(label)
|
| 122 |
+
|
| 123 |
+
# Convertir las listas filtradas de nuevo a tensores
|
| 124 |
+
cajas = torch.stack(boxes_filt) if boxes_filt else torch.empty((0, 4))
|
| 125 |
+
scores = torch.stack(scores_filt) if scores_filt else torch.empty((0,))
|
| 126 |
+
etiquetas = labels_filt
|
| 127 |
|
| 128 |
if len(cajas) == 0:
|
| 129 |
return imagen, f"No se encontr贸 nada para '{texto}' con el umbral actual ({box_threshold}). Intenta bajarlo."
|
|
|
|
| 130 |
# 3. SAM 2.1: Segmentar dentro de las cajas encontradas
|
| 131 |
sam2_predictor.set_image(imagen_np)
|
| 132 |
|
|
|
|
| 141 |
)
|
| 142 |
|
| 143 |
# Las m谩scaras de SAM tienen forma (N, 1, H, W). Las aplanamos a (N, H, W)
|
| 144 |
+
# Solo hacemos squeeze si hay m谩scaras, para evitar errores con tensores vac铆os.
|
| 145 |
+
if masks.ndim == 4 and masks.shape[1] == 1:
|
| 146 |
+
masks = masks.squeeze(1)
|
| 147 |
# 4. SUPERPONER M脕SCARAS
|
| 148 |
resultado_img = create_mask_overlay(imagen, masks)
|
| 149 |
|