GitHub Actions commited on
Commit
3fb0d78
·
1 Parent(s): 3885315

Deploy 50e0a6d

Browse files
Files changed (3) hide show
  1. Dockerfile +26 -0
  2. README.md +3 -5
  3. app.py +65 -0
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # infra/hf_spaces/reranker/Dockerfile
2
+ # Bakes cross-encoder/ms-marco-MiniLM-L-6-v2 weights into the image at build time.
3
+
4
+ FROM python:3.11-slim
5
+
6
+ WORKDIR /app
7
+
8
+ ENV HF_HOME=/app/model_cache \
9
+ PYTHONDONTWRITEBYTECODE=1 \
10
+ PYTHONUNBUFFERED=1
11
+
12
+ RUN pip install --no-cache-dir \
13
+ fastapi>=0.115.0 \
14
+ uvicorn[standard]>=0.29.0 \
15
+ sentence-transformers>=3.0.0
16
+
17
+ # Bake model weights into this Docker layer.
18
+ RUN python -c "\
19
+ from sentence_transformers import CrossEncoder; \
20
+ CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2', cache_folder='/app/model_cache')"
21
+
22
+ COPY app.py .
23
+
24
+ EXPOSE 7860
25
+
26
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
README.md CHANGED
@@ -1,10 +1,8 @@
1
  ---
2
- title: Personabot Reranker
3
- emoji: 🌍
4
  colorFrom: purple
5
- colorTo: purple
6
  sdk: docker
7
  pinned: false
8
  ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: personabot-reranker
3
+ emoji: 🎯
4
  colorFrom: purple
5
+ colorTo: pink
6
  sdk: docker
7
  pinned: false
8
  ---
 
 
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # infra/hf_spaces/reranker/app.py
2
+ # Serves cross-encoder/ms-marco-MiniLM-L-6-v2 reranking over HTTP.
3
+ # Model is loaded from /app/model_cache (baked into the Docker image at build time).
4
+
5
+ from contextlib import asynccontextmanager
6
+
7
+ from fastapi import FastAPI
8
+ from pydantic import BaseModel
9
+ from sentence_transformers import CrossEncoder
10
+
11
+
12
+ class RerankRequest(BaseModel):
13
+ query: str
14
+ texts: list[str]
15
+ top_k: int = 5
16
+
17
+
18
+ class RerankResponse(BaseModel):
19
+ # Indices into the input texts list, sorted by descending relevance.
20
+ indices: list[int]
21
+ scores: list[float]
22
+
23
+
24
+ @asynccontextmanager
25
+ async def lifespan(app: FastAPI):
26
+ app.state.model = CrossEncoder(
27
+ "cross-encoder/ms-marco-MiniLM-L-6-v2",
28
+ cache_folder="/app/model_cache",
29
+ )
30
+ yield
31
+ app.state.model = None
32
+
33
+
34
+ app = FastAPI(
35
+ title="PersonaBot Reranker",
36
+ lifespan=lifespan,
37
+ docs_url=None,
38
+ redoc_url=None,
39
+ openapi_url=None,
40
+ )
41
+
42
+
43
+ @app.get("/health")
44
+ async def health() -> dict[str, str]:
45
+ if app.state.model is None:
46
+ return {"status": "loading"}
47
+ return {"status": "ok"}
48
+
49
+
50
+ @app.post("/rerank", response_model=RerankResponse)
51
+ async def rerank(request: RerankRequest) -> RerankResponse:
52
+ if not request.texts:
53
+ return RerankResponse(indices=[], scores=[])
54
+
55
+ pairs = [(request.query, text) for text in request.texts]
56
+ raw_scores: list[float] = [float(s) for s in app.state.model.predict(pairs)]
57
+
58
+ # Sort by score descending, return top_k
59
+ ranked = sorted(enumerate(raw_scores), key=lambda x: x[1], reverse=True)
60
+ ranked = ranked[: request.top_k]
61
+
62
+ return RerankResponse(
63
+ indices=[i for i, _ in ranked],
64
+ scores=[s for _, s in ranked],
65
+ )