Spaces:

addgbf
/

iia

Sleeping

App Files Files Community

addgbf commited on Sep 1, 2025

Commit

33c7b5c

verified ·

1 Parent(s): 4cbb91d

Update server1.py

Browse files

Files changed (1) hide show

server1.py +34 -24

server1.py CHANGED Viewed

@@ -8,15 +8,24 @@ from fastapi import FastAPI, File, UploadFile, Request
 from fastapi.responses import JSONResponse
 from PIL import Image, UnidentifiedImageError, ImageFile
 from torchvision import transforms as T
-import open_clip
 ImageFile.LOAD_TRUNCATED_IMAGES = True
-# caches (opcional)
-CACHE_ROOT = os.environ.get("XDG_CACHE_HOME", "/tmp/.cache")
-os.makedirs(CACHE_ROOT, exist_ok=True)
-# limites basicos
 torch.set_num_threads(1)
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["MKL_NUM_THREADS"] = "1"
@@ -26,7 +35,7 @@ DTYPE  = torch.float16 if DEVICE == "cuda" else torch.float32
 if DEVICE == "cuda":
     torch.set_float32_matmul_precision("high")
-# rutas a embeddings (REGENERADOS con bigG)
 MODEL_EMB_PATH = os.getenv("MODEL_EMB_PATH", "text_embeddings_modelos_bigg.pt")
 VERS_EMB_PATH  = os.getenv("VERS_EMB_PATH",  "text_embeddings_bigg.pt")
@@ -35,23 +44,22 @@ PRETRAINED = "laion2b_s39b_b160k"
 app = FastAPI(title="OpenCLIP bigG Vehicle API")
-# ============== modelo / preprocess ==============
 clip_model, _, preprocess = open_clip.create_model_and_transforms(MODEL_NAME, pretrained=PRETRAINED)
 clip_model = clip_model.to(device=DEVICE, dtype=DTYPE).eval()
 for p in clip_model.parameters():
     p.requires_grad = False
-# tomar mean/std del preprocess y hacer letterbox propio
 normalize = next(t for t in preprocess.transforms if isinstance(t, T.Normalize))
-SIZE =  next((t.size if hasattr(t, "size") else None) for t in preprocess.transforms if hasattr(t, "size"))
 if isinstance(SIZE, (tuple, list)):
-    SIZE = max(SIZE)
 if SIZE is None:
-    SIZE = 448  # valor razonable si no se pudo leer
 transform = T.Compose([T.ToTensor(), T.Normalize(mean=normalize.mean, std=normalize.std)])
-# ============== utils imagen ==============
 def resize_letterbox(img: Image.Image, size: int) -> Image.Image:
     if img.mode != "RGB":
         img = img.convert("RGB")
@@ -59,13 +67,13 @@ def resize_letterbox(img: Image.Image, size: int) -> Image.Image:
     if w == 0 or h == 0:
         raise UnidentifiedImageError("imagen invalida")
     scale = size / max(w, h)
-    nw, nh = max(1, int(w * scale)), max(1, int(h * scale))
     img_resized = img.resize((nw, nh), Image.BICUBIC)
     canvas = Image.new("RGB", (size, size), (0, 0, 0))
-    canvas.paste(img_resized, ((size - nw) // 2, (size - nh) // 2))
     return canvas
-# ============== embeddings ==============
 def _ensure_label_list(x):
     if isinstance(x, (list, tuple)):
         return list(x)
@@ -83,17 +91,17 @@ def _load_embeddings(path: str):
 model_labels, model_embeddings     = _load_embeddings(MODEL_EMB_PATH)
 version_labels, version_embeddings = _load_embeddings(VERS_EMB_PATH)
-# comprobar dimension contra una pasada dummy
 with torch.inference_mode():
     dummy = torch.zeros(1, 3, SIZE, SIZE, device=DEVICE, dtype=DTYPE)
-    img_dim = (clip_model.encode_image(dummy) / 1.0).shape[-1]
 if model_embeddings.shape[1] != img_dim or version_embeddings.shape[1] != img_dim:
     raise RuntimeError(
         f"dimension mismatch: image={img_dim}, modelos={model_embeddings.shape[1]}, "
         f"versiones={version_embeddings.shape[1]}. Recalcula embeddings con {MODEL_NAME}/{PRETRAINED}."
     )
-# ============== inferencia ==============
 @torch.inference_mode()
 def _encode_pil(img: Image.Image) -> torch.Tensor:
     img = resize_letterbox(img, SIZE)
@@ -107,7 +115,7 @@ def _topk_cosine(text_feats: torch.Tensor, text_labels: List[str], img_feat: tor
     sim = (img_feat.float() @ text_feats.to(img_feat.device).float().T)[0]
     vals, idxs = torch.topk(sim, k=k)
     conf = torch.softmax(vals, dim=0)
-    return [{"label": text_labels[int(i)], "confidence": round(float(c) * 100.0, 2)} for i, c in zip(idxs, conf)]
 def process_image_bytes(front_bytes: bytes, back_bytes: Optional[bytes] = None):
     if not front_bytes or len(front_bytes) < 128:
@@ -127,7 +135,7 @@ def process_image_bytes(front_bytes: bytes, back_bytes: Optional[bytes] = None):
     else:
         img_feat = feat_front
-    # paso 1: top-1 modelo
     top_model = _topk_cosine(model_embeddings, model_labels, img_feat, k=1)[0]
     modelo_full = top_model["label"]
@@ -135,7 +143,7 @@ def process_image_bytes(front_bytes: bytes, back_bytes: Optional[bytes] = None):
     marca  = partes[0] if len(partes) >= 1 else ""
     modelo = partes[1] if len(partes) == 2 else ""
-    # paso 2: versiones que empiecen por el modelo completo
     matches = [(lab, idx) for idx, lab in enumerate(version_labels) if lab.startswith(modelo_full)]
     if not matches:
         return {"brand": marca.upper(), "model": modelo.title(), "version": ""}
@@ -144,7 +152,7 @@ def process_image_bytes(front_bytes: bytes, back_bytes: Optional[bytes] = None):
     labels_sub = [lab for lab, _ in matches]
     embeds_sub = version_embeddings[idxs]
-    # paso 3: top-1 version
     top_ver = _topk_cosine(embeds_sub, labels_sub, img_feat, k=1)[0]
     raw = top_ver["label"]
     prefix = modelo_full + " "
@@ -155,7 +163,9 @@ def process_image_bytes(front_bytes: bytes, back_bytes: Optional[bytes] = None):
     return {"brand": marca.upper(), "model": modelo.title(), "version": ver.title() if ver else ""}
-# ============== endpoints ==============
 @app.get("/")
 def root():
     return {"status": "ok", "device": DEVICE, "model": f"{MODEL_NAME}/{PRETRAINED}", "img_dim": int(model_embeddings.shape[1])}

 from fastapi.responses import JSONResponse
 from PIL import Image, UnidentifiedImageError, ImageFile
 from torchvision import transforms as T
 ImageFile.LOAD_TRUNCATED_IMAGES = True
+# ===== caches (usar ruta propia, escribible en runtime) =====
+CACHE_ROOT = os.environ.get("APP_CACHE", "/tmp/appcache")
+os.environ["XDG_CACHE_HOME"] = CACHE_ROOT
+os.environ["HF_HOME"] = os.path.join(CACHE_ROOT, "hf")
+os.environ["HUGGINGFACE_HUB_CACHE"] = os.environ["HF_HOME"]
+os.environ["TRANSFORMERS_CACHE"] = os.environ["HF_HOME"]
+os.environ["OPENCLIP_CACHE_DIR"] = os.path.join(CACHE_ROOT, "open_clip")
+os.environ["TORCH_HOME"] = os.path.join(CACHE_ROOT, "torch")
+os.makedirs(os.environ["HF_HOME"], exist_ok=True)
+os.makedirs(os.environ["OPENCLIP_CACHE_DIR"], exist_ok=True)
+os.makedirs(os.environ["TORCH_HOME"], exist_ok=True)
+import open_clip  # <-- importar despues de ajustar caches
+# ===== limites basicos =====
 torch.set_num_threads(1)
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["MKL_NUM_THREADS"] = "1"
 if DEVICE == "cuda":
     torch.set_float32_matmul_precision("high")
+# ===== rutas a embeddings (bigG) =====
 MODEL_EMB_PATH = os.getenv("MODEL_EMB_PATH", "text_embeddings_modelos_bigg.pt")
 VERS_EMB_PATH  = os.getenv("VERS_EMB_PATH",  "text_embeddings_bigg.pt")
 app = FastAPI(title="OpenCLIP bigG Vehicle API")
+# ===== modelo / preprocess =====
 clip_model, _, preprocess = open_clip.create_model_and_transforms(MODEL_NAME, pretrained=PRETRAINED)
 clip_model = clip_model.to(device=DEVICE, dtype=DTYPE).eval()
 for p in clip_model.parameters():
     p.requires_grad = False
 normalize = next(t for t in preprocess.transforms if isinstance(t, T.Normalize))
+SIZE = next((getattr(t, "size", None) for t in preprocess.transforms if hasattr(t, "size")), None)
 if isinstance(SIZE, (tuple, list)):
+    SIZE = max(SIZE)  # por si viene como (H,W)
 if SIZE is None:
+    SIZE = 448  # valor razonable por defecto
 transform = T.Compose([T.ToTensor(), T.Normalize(mean=normalize.mean, std=normalize.std)])
+# ===== utils imagen =====
 def resize_letterbox(img: Image.Image, size: int) -> Image.Image:
     if img.mode != "RGB":
         img = img.convert("RGB")
     if w == 0 or h == 0:
         raise UnidentifiedImageError("imagen invalida")
     scale = size / max(w, h)
+    nw, nh = max(1, int(w*scale)), max(1, int(h*scale))
     img_resized = img.resize((nw, nh), Image.BICUBIC)
     canvas = Image.new("RGB", (size, size), (0, 0, 0))
+    canvas.paste(img_resized, ((size-nw)//2, (size-nh)//2))
     return canvas
+# ===== cargar embeddings =====
 def _ensure_label_list(x):
     if isinstance(x, (list, tuple)):
         return list(x)
 model_labels, model_embeddings     = _load_embeddings(MODEL_EMB_PATH)
 version_labels, version_embeddings = _load_embeddings(VERS_EMB_PATH)
+# comprobar dimension: debe ser 1280 para bigG
 with torch.inference_mode():
     dummy = torch.zeros(1, 3, SIZE, SIZE, device=DEVICE, dtype=DTYPE)
+    img_dim = clip_model.encode_image(dummy).shape[-1]
 if model_embeddings.shape[1] != img_dim or version_embeddings.shape[1] != img_dim:
     raise RuntimeError(
         f"dimension mismatch: image={img_dim}, modelos={model_embeddings.shape[1]}, "
         f"versiones={version_embeddings.shape[1]}. Recalcula embeddings con {MODEL_NAME}/{PRETRAINED}."
     )
+# ===== inferencia =====
 @torch.inference_mode()
 def _encode_pil(img: Image.Image) -> torch.Tensor:
     img = resize_letterbox(img, SIZE)
     sim = (img_feat.float() @ text_feats.to(img_feat.device).float().T)[0]
     vals, idxs = torch.topk(sim, k=k)
     conf = torch.softmax(vals, dim=0)
+    return [{"label": text_labels[int(i)], "confidence": round(float(c)*100.0, 2)} for i, c in zip(idxs, conf)]
 def process_image_bytes(front_bytes: bytes, back_bytes: Optional[bytes] = None):
     if not front_bytes or len(front_bytes) < 128:
     else:
         img_feat = feat_front
+    # paso 1: modelo
     top_model = _topk_cosine(model_embeddings, model_labels, img_feat, k=1)[0]
     modelo_full = top_model["label"]
     marca  = partes[0] if len(partes) >= 1 else ""
     modelo = partes[1] if len(partes) == 2 else ""
+    # paso 2: versiones por prefijo
     matches = [(lab, idx) for idx, lab in enumerate(version_labels) if lab.startswith(modelo_full)]
     if not matches:
         return {"brand": marca.upper(), "model": modelo.title(), "version": ""}
     labels_sub = [lab for lab, _ in matches]
     embeds_sub = version_embeddings[idxs]
+    # paso 3: version
     top_ver = _topk_cosine(embeds_sub, labels_sub, img_feat, k=1)[0]
     raw = top_ver["label"]
     prefix = modelo_full + " "
     return {"brand": marca.upper(), "model": modelo.title(), "version": ver.title() if ver else ""}
+# ===== endpoints =====
+app = FastAPI(title="OpenCLIP bigG Vehicle API")
 @app.get("/")
 def root():
     return {"status": "ok", "device": DEVICE, "model": f"{MODEL_NAME}/{PRETRAINED}", "img_dim": int(model_embeddings.shape[1])}