Spaces:

1337XCode
/

personabot-reranker

Running

App Files Files Community

GitHub Actions commited on Feb 27

Commit

3fb0d78

1 Parent(s): 3885315

Deploy 50e0a6d

Browse files

Files changed (3) hide show

Dockerfile +26 -0
README.md +3 -5
app.py +65 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,26 @@

+# infra/hf_spaces/reranker/Dockerfile
+# Bakes cross-encoder/ms-marco-MiniLM-L-6-v2 weights into the image at build time.
+FROM python:3.11-slim
+WORKDIR /app
+ENV HF_HOME=/app/model_cache \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
+RUN pip install --no-cache-dir \
+    fastapi>=0.115.0 \
+    uvicorn[standard]>=0.29.0 \
+    sentence-transformers>=3.0.0
+# Bake model weights into this Docker layer.
+RUN python -c "\
+from sentence_transformers import CrossEncoder; \
+CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2', cache_folder='/app/model_cache')"
+COPY app.py .
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

README.md CHANGED Viewed

@@ -1,10 +1,8 @@
 ---
-title: Personabot Reranker
-emoji: 🌍
 colorFrom: purple
-colorTo: purple
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: personabot-reranker
+emoji: 🎯
 colorFrom: purple
+colorTo: pink
 sdk: docker
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,65 @@

+# infra/hf_spaces/reranker/app.py
+# Serves cross-encoder/ms-marco-MiniLM-L-6-v2 reranking over HTTP.
+# Model is loaded from /app/model_cache (baked into the Docker image at build time).
+from contextlib import asynccontextmanager
+from fastapi import FastAPI
+from pydantic import BaseModel
+from sentence_transformers import CrossEncoder
+class RerankRequest(BaseModel):
+    query: str
+    texts: list[str]
+    top_k: int = 5
+class RerankResponse(BaseModel):
+    # Indices into the input texts list, sorted by descending relevance.
+    indices: list[int]
+    scores: list[float]
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    app.state.model = CrossEncoder(
+        "cross-encoder/ms-marco-MiniLM-L-6-v2",
+        cache_folder="/app/model_cache",
+    )
+    yield
+    app.state.model = None
+app = FastAPI(
+    title="PersonaBot Reranker",
+    lifespan=lifespan,
+    docs_url=None,
+    redoc_url=None,
+    openapi_url=None,
+)
+@app.get("/health")
+async def health() -> dict[str, str]:
+    if app.state.model is None:
+        return {"status": "loading"}
+    return {"status": "ok"}
+@app.post("/rerank", response_model=RerankResponse)
+async def rerank(request: RerankRequest) -> RerankResponse:
+    if not request.texts:
+        return RerankResponse(indices=[], scores=[])
+    pairs = [(request.query, text) for text in request.texts]
+    raw_scores: list[float] = [float(s) for s in app.state.model.predict(pairs)]
+    # Sort by score descending, return top_k
+    ranked = sorted(enumerate(raw_scores), key=lambda x: x[1], reverse=True)
+    ranked = ranked[: request.top_k]
+    return RerankResponse(
+        indices=[i for i, _ in ranked],
+        scores=[s for _, s in ranked],
+    )