Spaces:
Runtime error
Runtime error
| import os | |
| import time | |
| from typing import List | |
| import numpy as np | |
| import torch | |
| from fastapi import FastAPI | |
| from pydantic import BaseModel, Field | |
| from sentence_transformers import SentenceTransformer | |
| # HF CPU optimization: don't oversubscribe threads | |
| torch.set_num_threads(2) | |
| torch.set_num_interop_threads(2) | |
| os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
| DEFAULT_MODEL = "sentence-transformers/all-MiniLM-L6-v2" | |
| app = FastAPI(title="Embedding API") | |
| MODEL = None # singleton | |
| class BatchRequest(BaseModel): | |
| texts: List[str] = Field(..., min_length=1) | |
| normalize: bool = True | |
| batch_size: int = Field(64, ge=1, le=512) | |
| async def startup(): | |
| global MODEL | |
| if MODEL is None: | |
| print(f"Loading model once: {DEFAULT_MODEL}") | |
| MODEL = SentenceTransformer(DEFAULT_MODEL, device="cpu") | |
| print("✅ Model loaded") | |
| async def health(): | |
| return {"ok": True, "model": DEFAULT_MODEL} | |
| async def embed_batch(req: BatchRequest): | |
| t0 = time.perf_counter() | |
| emb = MODEL.encode( | |
| req.texts, | |
| batch_size=req.batch_size, | |
| normalize_embeddings=req.normalize, | |
| convert_to_numpy=True, | |
| show_progress_bar=False, | |
| ) | |
| if emb.dtype != np.float32: | |
| emb = emb.astype(np.float32) | |
| ms = (time.perf_counter() - t0) * 1000.0 | |
| return { | |
| "dim": int(emb.shape[1]), | |
| "count": int(emb.shape[0]), | |
| "ms": ms, | |
| "embeddings": emb.tolist(), | |
| } | |