GitHub Actions commited on
Commit
61bd74f
Β·
1 Parent(s): 4dcfe70

Deploy 50e0a6d

Browse files
Files changed (3) hide show
  1. Dockerfile +31 -0
  2. README.md +4 -6
  3. app.py +62 -0
Dockerfile ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # infra/hf_spaces/embedder/Dockerfile
2
+ # Bakes BAAI/bge-small-en-v1.5 weights into the image at build time.
3
+ # Cold start: ~5s (weights load from disk, not downloaded). No startup surprises.
4
+
5
+ FROM python:3.11-slim
6
+
7
+ WORKDIR /app
8
+
9
+ # HF_HOME must be set before the RUN python cache step so the model
10
+ # is written to /app/model_cache, which becomes part of this layer.
11
+ ENV HF_HOME=/app/model_cache \
12
+ PYTHONDONTWRITEBYTECODE=1 \
13
+ PYTHONUNBUFFERED=1
14
+
15
+ RUN pip install --no-cache-dir \
16
+ fastapi>=0.115.0 \
17
+ uvicorn[standard]>=0.29.0 \
18
+ sentence-transformers>=3.0.0 \
19
+ numpy>=1.26.0
20
+
21
+ # Download and cache model weights during build β€” never at runtime.
22
+ # This is the key step that makes cold starts fast on HF Spaces.
23
+ RUN python -c "\
24
+ from sentence_transformers import SentenceTransformer; \
25
+ SentenceTransformer('BAAI/bge-small-en-v1.5', cache_folder='/app/model_cache')"
26
+
27
+ COPY app.py .
28
+
29
+ EXPOSE 7860
30
+
31
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
README.md CHANGED
@@ -1,10 +1,8 @@
1
  ---
2
- title: Personabot Embedder
3
- emoji: πŸ‘€
4
- colorFrom: indigo
5
- colorTo: purple
6
  sdk: docker
7
  pinned: false
8
  ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: personabot-embedder
3
+ emoji: πŸ”
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: docker
7
  pinned: false
8
  ---
 
 
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # infra/hf_spaces/embedder/app.py
2
+ # Serves BAAI/bge-small-en-v1.5 embeddings over HTTP.
3
+ # Model is loaded from /app/model_cache (baked into the Docker image at build time).
4
+
5
+ from contextlib import asynccontextmanager
6
+ from typing import Any
7
+
8
+ import numpy as np
9
+ from fastapi import FastAPI
10
+ from pydantic import BaseModel
11
+ from sentence_transformers import SentenceTransformer
12
+
13
+
14
+ class EmbedRequest(BaseModel):
15
+ texts: list[str]
16
+
17
+
18
+ class EmbedResponse(BaseModel):
19
+ embeddings: list[list[float]]
20
+
21
+
22
+ @asynccontextmanager
23
+ async def lifespan(app: FastAPI):
24
+ # Load from baked-in cache path β€” no network call at startup.
25
+ # BGE normalises embeddings by default; no manual L2 step needed.
26
+ app.state.model = SentenceTransformer(
27
+ "BAAI/bge-small-en-v1.5",
28
+ cache_folder="/app/model_cache",
29
+ )
30
+ app.state.model.eval()
31
+ yield
32
+ app.state.model = None
33
+
34
+
35
+ app = FastAPI(
36
+ title="PersonaBot Embedder",
37
+ lifespan=lifespan,
38
+ docs_url=None,
39
+ redoc_url=None,
40
+ openapi_url=None,
41
+ )
42
+
43
+
44
+ @app.get("/health")
45
+ async def health() -> dict[str, str]:
46
+ if app.state.model is None:
47
+ return {"status": "loading"}
48
+ return {"status": "ok"}
49
+
50
+
51
+ @app.post("/embed", response_model=EmbedResponse)
52
+ async def embed(request: EmbedRequest) -> EmbedResponse:
53
+ if not request.texts:
54
+ return EmbedResponse(embeddings=[])
55
+ # encode with batch_size=32, returns numpy array shape (N, 384)
56
+ vectors: Any = app.state.model.encode(
57
+ request.texts,
58
+ batch_size=32,
59
+ normalize_embeddings=True,
60
+ show_progress_bar=False,
61
+ )
62
+ return EmbedResponse(embeddings=vectors.tolist())