Spaces:

chipling
/

g-clip-inference

Runtime error

App Files Files Community

chipling commited on Dec 22, 2025

Commit

844d9d8

verified ·

1 Parent(s): 98537c7

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -15

app.py CHANGED Viewed

@@ -4,49 +4,49 @@ from transformers import AutoProcessor, AutoModel
 from PIL import Image
 import torch
 import io
 app = FastAPI()
 model_id = "google/siglip2-so400m-patch14-384"
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# OPTIMIZATION 1: Load with half-precision if on GPU, or optimized CPU settings
 model = AutoModel.from_pretrained(
     model_id,
-    torch_dtype=torch.float32, # Use float16 if you have a GPU
     low_cpu_mem_usage=True
 ).to(device).eval()
-# OPTIMIZATION 2: Compile the model (Requires PyTorch 2.0+)
 try:
     model = torch.compile(model)
-except Exception as e:
-    print(f"Compilation skipped: {e}")
-processor = AutoProcessor.from_pretrained(model_id)
 class TextRequest(BaseModel):
     text: str
-# OPTIMIZATION 3: Remove 'async' so FastAPI uses thread pools for CPU work
 @app.post("/embed-text")
 def embed_text(request: TextRequest):
     inputs = processor(text=[request.text], padding="max_length", return_tensors="pt").to(device)
-    with torch.no_grad():
-        # OPTIMIZATION 4: Use Inference Mode (faster than no_grad)
-        with torch.inference_mode():
-            text_outputs = model.get_text_features(**inputs)
     return {"vector": text_outputs[0].cpu().tolist(), "dim": 1152}
 @app.post("/embed-image")
 def embed_image(file: UploadFile = File(...)):
-    # Reading file is still async-friendly
     image_data = file.file.read()
     image = Image.open(io.BytesIO(image_data)).convert("RGB")
     inputs = processor(images=image, return_tensors="pt").to(device)
     with torch.inference_mode():
         image_outputs = model.get_image_features(**inputs)
     return {"vector": image_outputs[0].cpu().tolist(), "dim": 1152}

 from PIL import Image
 import torch
 import io
+import os
+# Set higher timeout for model downloading
+os.environ["HF_HUB_READ_TIMEOUT"] = "60"
 app = FastAPI()
 model_id = "google/siglip2-so400m-patch14-384"
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# FIX 1: Use torch_dtype directly (deprecation fix)
+# FIX 2: use low_cpu_mem_usage to prevent RAM spikes on 16GB
 model = AutoModel.from_pretrained(
     model_id,
+    torch_dtype=torch.float32,
     low_cpu_mem_usage=True
 ).to(device).eval()
+# FIX 3: Explicitly set use_fast=True to avoid the processor warning
+processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
+# OPTIMIZATION: Faster inference
 try:
     model = torch.compile(model)
+except:
+    pass
 class TextRequest(BaseModel):
     text: str
 @app.post("/embed-text")
 def embed_text(request: TextRequest):
     inputs = processor(text=[request.text], padding="max_length", return_tensors="pt").to(device)
+    with torch.inference_mode():
+        text_outputs = model.get_text_features(**inputs)
     return {"vector": text_outputs[0].cpu().tolist(), "dim": 1152}
 @app.post("/embed-image")
 def embed_image(file: UploadFile = File(...)):
     image_data = file.file.read()
     image = Image.open(io.BytesIO(image_data)).convert("RGB")
     inputs = processor(images=image, return_tensors="pt").to(device)
     with torch.inference_mode():
         image_outputs = model.get_image_features(**inputs)
     return {"vector": image_outputs[0].cpu().tolist(), "dim": 1152}