chipling commited on
Commit
fe8fc9b
·
verified ·
1 Parent(s): fb45461

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -11
app.py CHANGED
@@ -1,22 +1,21 @@
1
  from fastapi import FastAPI, UploadFile, File
2
- from transformers import AutoProcessor, AutoModel
3
  from PIL import Image
4
  import torch
5
  import io
6
 
7
  app = FastAPI()
8
- model_id = "google/siglip2-base-patch16-224"
9
 
10
  # Check for GPU, but default to optimized CPU path
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
- dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
13
 
14
  # 1. Load with memory-efficient settings
15
- model = AutoModel.from_pretrained(
16
  model_id,
17
  torch_dtype=dtype,
18
- low_cpu_mem_usage=True,
19
- attn_implementation="sdpa" # Use Scaled Dot Product Attention
20
  ).to(device).eval()
21
 
22
  # 2. COMPILE THE MODEL (The huge speed boost)
@@ -26,22 +25,24 @@ try:
26
  except Exception:
27
  print("Torch compile not supported on this environment, skipping...")
28
 
29
- processor = AutoProcessor.from_pretrained(model_id)
30
 
31
  # 3. USE 'def' (Not 'async def') for CPU-heavy tasks
32
  # This allows FastAPI to run searches in parallel on different CPU cores
33
  @app.post("/embed-text")
34
  def embed_text(text: str):
35
- # GEMMA FIX: max_length=64 is required for SigLIP 2
36
  inputs = processor(
37
  text=[text],
38
- padding="max_length",
39
- max_length=64,
40
  return_tensors="pt"
41
  ).to(device)
42
 
43
  with torch.inference_mode(): # Faster than no_grad()
44
  outputs = model.get_text_features(**inputs)
 
 
45
 
46
  return {"vector": outputs[0].cpu().tolist()}
47
 
@@ -54,5 +55,7 @@ def embed_image(file: UploadFile = File(...)):
54
 
55
  with torch.inference_mode():
56
  outputs = model.get_image_features(**inputs)
 
 
57
 
58
- return {"vector": outputs[0].cpu().tolist()}
 
1
  from fastapi import FastAPI, UploadFile, File
2
+ from transformers import CLIPProcessor, CLIPModel
3
  from PIL import Image
4
  import torch
5
  import io
6
 
7
  app = FastAPI()
8
+ model_id = "openai/clip-vit-large-patch14"
9
 
10
  # Check for GPU, but default to optimized CPU path
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
13
 
14
  # 1. Load with memory-efficient settings
15
+ model = CLIPModel.from_pretrained(
16
  model_id,
17
  torch_dtype=dtype,
18
+ low_cpu_mem_usage=True
 
19
  ).to(device).eval()
20
 
21
  # 2. COMPILE THE MODEL (The huge speed boost)
 
25
  except Exception:
26
  print("Torch compile not supported on this environment, skipping...")
27
 
28
+ processor = CLIPProcessor.from_pretrained(model_id)
29
 
30
  # 3. USE 'def' (Not 'async def') for CPU-heavy tasks
31
  # This allows FastAPI to run searches in parallel on different CPU cores
32
  @app.post("/embed-text")
33
  def embed_text(text: str):
34
+ # CLIP uses max 77 tokens for text
35
  inputs = processor(
36
  text=[text],
37
+ padding=True,
38
+ truncation=True,
39
  return_tensors="pt"
40
  ).to(device)
41
 
42
  with torch.inference_mode(): # Faster than no_grad()
43
  outputs = model.get_text_features(**inputs)
44
+ # Normalize embeddings for cosine similarity
45
+ outputs = outputs / outputs.norm(dim=-1, keepdim=True)
46
 
47
  return {"vector": outputs[0].cpu().tolist()}
48
 
 
55
 
56
  with torch.inference_mode():
57
  outputs = model.get_image_features(**inputs)
58
+ # Normalize embeddings for cosine similarity
59
+ outputs = outputs / outputs.norm(dim=-1, keepdim=True)
60
 
61
+ return {"vector": outputs[0].cpu().tolist()}