johndoe321's picture
Update app.py
c582fc5 verified
import os
import time
from typing import List
import numpy as np
import torch
from fastapi import FastAPI
from pydantic import BaseModel, Field
from sentence_transformers import SentenceTransformer
# HF CPU optimization: don't oversubscribe threads
torch.set_num_threads(2)
torch.set_num_interop_threads(2)
os.environ["TOKENIZERS_PARALLELISM"] = "false"
DEFAULT_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
app = FastAPI(title="Embedding API")
MODEL = None # singleton
class BatchRequest(BaseModel):
texts: List[str] = Field(..., min_length=1)
normalize: bool = True
batch_size: int = Field(64, ge=1, le=512)
@app.on_event("startup")
async def startup():
global MODEL
if MODEL is None:
print(f"Loading model once: {DEFAULT_MODEL}")
MODEL = SentenceTransformer(DEFAULT_MODEL, device="cpu")
print("✅ Model loaded")
@app.get("/health")
async def health():
return {"ok": True, "model": DEFAULT_MODEL}
@app.post("/embed_batch")
async def embed_batch(req: BatchRequest):
t0 = time.perf_counter()
emb = MODEL.encode(
req.texts,
batch_size=req.batch_size,
normalize_embeddings=req.normalize,
convert_to_numpy=True,
show_progress_bar=False,
)
if emb.dtype != np.float32:
emb = emb.astype(np.float32)
ms = (time.perf_counter() - t0) * 1000.0
return {
"dim": int(emb.shape[1]),
"count": int(emb.shape[0]),
"ms": ms,
"embeddings": emb.tolist(),
}