Spaces:
Runtime error
Runtime error
Commit ·
ddb9445
0
Parent(s):
Init commit
Browse files- Dockerfile +28 -0
- README.md +57 -0
- app.py +60 -0
- requirements.txt +6 -0
Dockerfile
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
| 4 |
+
ENV PYTHONUNBUFFERED=1
|
| 5 |
+
|
| 6 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 7 |
+
build-essential git libglib2.0-0 libgl1 \
|
| 8 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
+
|
| 10 |
+
RUN useradd -m -u 1000 user
|
| 11 |
+
USER user
|
| 12 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
| 13 |
+
WORKDIR /app
|
| 14 |
+
|
| 15 |
+
COPY requirements.txt ./
|
| 16 |
+
RUN pip install --upgrade pip && pip install --no-cache-dir -r requirements.txt
|
| 17 |
+
|
| 18 |
+
COPY app.py ./
|
| 19 |
+
|
| 20 |
+
ENV HF_HOME="/home/user/.cache/huggingface"
|
| 21 |
+
ENV SENTENCE_TRANSFORMERS_HOME="/home/user/.cache/huggingface/sentence-transformers"
|
| 22 |
+
|
| 23 |
+
ENV PORT=7860
|
| 24 |
+
EXPOSE 7860
|
| 25 |
+
|
| 26 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
|
| 27 |
+
|
| 28 |
+
|
README.md
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Embedding
|
| 3 |
+
emoji: 🐠
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: gray
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
short_description: Simple API run sentence-transformers/all-MiniLM-L6-v2
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# Embedder Service (HuggingFace Space)
|
| 12 |
+
|
| 13 |
+
A lightweight microservice exposing sentence-transformers embeddings over HTTP.
|
| 14 |
+
|
| 15 |
+
- Model: `sentence-transformers/all-MiniLM-L6-v2`
|
| 16 |
+
- Sequential queueing: handles one request at a time to avoid resource spikes.
|
| 17 |
+
|
| 18 |
+
## Endpoints
|
| 19 |
+
|
| 20 |
+
- `GET /health` → `{ ok: true, model: string, loaded: boolean }`
|
| 21 |
+
- `POST /embed`
|
| 22 |
+
- Request:
|
| 23 |
+
|
| 24 |
+
```
|
| 25 |
+
{
|
| 26 |
+
"texts": ["hello world", "another document"]
|
| 27 |
+
}
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
- Response:
|
| 31 |
+
|
| 32 |
+
```
|
| 33 |
+
{
|
| 34 |
+
"vectors": [[0.01, -0.02, ...], [0.03, -0.01, ...]],
|
| 35 |
+
"model": "sentence-transformers/all-MiniLM-L6-v2"
|
| 36 |
+
}
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
## Deploy on HF Spaces
|
| 40 |
+
|
| 41 |
+
1. Create a new Space (Docker type)
|
| 42 |
+
2. Upload `app.py`, `Dockerfile`, `requirements.txt`
|
| 43 |
+
3. Set Space hardware to CPU (Small is fine)
|
| 44 |
+
4. Space will run on port 7860 by default
|
| 45 |
+
|
| 46 |
+
## Example cURL
|
| 47 |
+
|
| 48 |
+
```
|
| 49 |
+
curl -s -X POST https://binkhoale1812-embedding.hf.space/embed \
|
| 50 |
+
-H 'Content-Type: application/json' \
|
| 51 |
+
-d '{"texts": ["An embedding request", "Second input"]}' | jq .
|
| 52 |
+
```
|
| 53 |
+
|
| 54 |
+
## Notes
|
| 55 |
+
|
| 56 |
+
- The service lazily loads the model on first request.
|
| 57 |
+
- If concurrent clients hit it, requests are serialized by a semaphore to reduce memory and CPU spikes.
|
app.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import asyncio
|
| 3 |
+
from typing import List
|
| 4 |
+
|
| 5 |
+
from fastapi import FastAPI
|
| 6 |
+
from pydantic import BaseModel
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
| 10 |
+
|
| 11 |
+
app = FastAPI(title="Embedder Service", version="1.0.0")
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class EmbedRequest(BaseModel):
|
| 15 |
+
texts: List[str]
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class EmbedResponse(BaseModel):
|
| 19 |
+
vectors: List[List[float]]
|
| 20 |
+
model: str
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
_model = None
|
| 24 |
+
_model_lock = asyncio.Lock()
|
| 25 |
+
_sequential_gate = asyncio.Semaphore(1) # ensure one job at a time
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def _lazy_load_model():
|
| 29 |
+
global _model
|
| 30 |
+
if _model is None:
|
| 31 |
+
# Lazy import to keep container startup light
|
| 32 |
+
from sentence_transformers import SentenceTransformer
|
| 33 |
+
_model = SentenceTransformer(MODEL_NAME)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@app.get("/health")
|
| 37 |
+
async def health():
|
| 38 |
+
return {"ok": True, "model": MODEL_NAME, "loaded": _model is not None}
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
@app.post("/embed", response_model=EmbedResponse)
|
| 42 |
+
async def embed(req: EmbedRequest):
|
| 43 |
+
# Simple sequential queueing: only one request processes at a time
|
| 44 |
+
async with _sequential_gate:
|
| 45 |
+
# Protect model initialization under a lock to avoid concurrent loads
|
| 46 |
+
async with _model_lock:
|
| 47 |
+
_lazy_load_model()
|
| 48 |
+
# Actual encoding
|
| 49 |
+
# sentence-transformers encode is sync; run in thread pool so we don't block loop
|
| 50 |
+
loop = asyncio.get_event_loop()
|
| 51 |
+
vectors = await loop.run_in_executor(None, lambda: _model.encode(req.texts, show_progress_bar=False, normalize_embeddings=True).tolist())
|
| 52 |
+
return EmbedResponse(vectors=vectors, model=MODEL_NAME)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
if __name__ == "__main__":
|
| 56 |
+
import uvicorn
|
| 57 |
+
port = int(os.getenv("PORT", "7860"))
|
| 58 |
+
uvicorn.run(app, host="0.0.0.0", port=port)
|
| 59 |
+
|
| 60 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.114.2
|
| 2 |
+
uvicorn[standard]==0.30.6
|
| 3 |
+
sentence-transformers==3.1.1
|
| 4 |
+
torch==2.2.2
|
| 5 |
+
numpy==1.26.4
|
| 6 |
+
|