Spaces:

1337XCode
/

personabot-embedder

Running

App Files Files Community

GitHub Actions commited on Feb 27

Commit

61bd74f

1 Parent(s): 4dcfe70

Deploy 50e0a6d

Browse files

Files changed (3) hide show

Dockerfile +31 -0
README.md +4 -6
app.py +62 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,31 @@

+# infra/hf_spaces/embedder/Dockerfile
+# Bakes BAAI/bge-small-en-v1.5 weights into the image at build time.
+# Cold start: ~5s (weights load from disk, not downloaded). No startup surprises.
+FROM python:3.11-slim
+WORKDIR /app
+# HF_HOME must be set before the RUN python cache step so the model
+# is written to /app/model_cache, which becomes part of this layer.
+ENV HF_HOME=/app/model_cache \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
+RUN pip install --no-cache-dir \
+    fastapi>=0.115.0 \
+    uvicorn[standard]>=0.29.0 \
+    sentence-transformers>=3.0.0 \
+    numpy>=1.26.0
+# Download and cache model weights during build — never at runtime.
+# This is the key step that makes cold starts fast on HF Spaces.
+RUN python -c "\
+from sentence_transformers import SentenceTransformer; \
+SentenceTransformer('BAAI/bge-small-en-v1.5', cache_folder='/app/model_cache')"
+COPY app.py .
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

README.md CHANGED Viewed

@@ -1,10 +1,8 @@
 ---
-title: Personabot Embedder
-emoji: 👀
-colorFrom: indigo
-colorTo: purple
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: personabot-embedder
+emoji: 🔍
+colorFrom: blue
+colorTo: indigo
 sdk: docker
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,62 @@

+# infra/hf_spaces/embedder/app.py
+# Serves BAAI/bge-small-en-v1.5 embeddings over HTTP.
+# Model is loaded from /app/model_cache (baked into the Docker image at build time).
+from contextlib import asynccontextmanager
+from typing import Any
+import numpy as np
+from fastapi import FastAPI
+from pydantic import BaseModel
+from sentence_transformers import SentenceTransformer
+class EmbedRequest(BaseModel):
+    texts: list[str]
+class EmbedResponse(BaseModel):
+    embeddings: list[list[float]]
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Load from baked-in cache path — no network call at startup.
+    # BGE normalises embeddings by default; no manual L2 step needed.
+    app.state.model = SentenceTransformer(
+        "BAAI/bge-small-en-v1.5",
+        cache_folder="/app/model_cache",
+    )
+    app.state.model.eval()
+    yield
+    app.state.model = None
+app = FastAPI(
+    title="PersonaBot Embedder",
+    lifespan=lifespan,
+    docs_url=None,
+    redoc_url=None,
+    openapi_url=None,
+)
+@app.get("/health")
+async def health() -> dict[str, str]:
+    if app.state.model is None:
+        return {"status": "loading"}
+    return {"status": "ok"}
+@app.post("/embed", response_model=EmbedResponse)
+async def embed(request: EmbedRequest) -> EmbedResponse:
+    if not request.texts:
+        return EmbedResponse(embeddings=[])
+    # encode with batch_size=32, returns numpy array shape (N, 384)
+    vectors: Any = app.state.model.encode(
+        request.texts,
+        batch_size=32,
+        normalize_embeddings=True,
+        show_progress_bar=False,
+    )
+    return EmbedResponse(embeddings=vectors.tolist())