Spaces:
Running
Running
GitHub Actions commited on
Commit ·
a99846a
1
Parent(s): e82be7a
Deploy d8ad462
Browse files
app.py
CHANGED
|
@@ -14,11 +14,17 @@ from sentence_transformers import SentenceTransformer
|
|
| 14 |
_MAX_TEXTS = 64
|
| 15 |
_MAX_TEXT_LEN = 2000
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
class EmbedRequest(BaseModel):
|
| 19 |
texts: list[Annotated[str, Field(max_length=_MAX_TEXT_LEN)]] = Field(
|
| 20 |
..., max_length=_MAX_TEXTS
|
| 21 |
)
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
class EmbedResponse(BaseModel):
|
|
@@ -58,9 +64,14 @@ async def health() -> dict[str, str]:
|
|
| 58 |
async def embed(request: EmbedRequest) -> EmbedResponse:
|
| 59 |
if not request.texts:
|
| 60 |
return EmbedResponse(embeddings=[])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
# encode with batch_size=32, returns numpy array shape (N, 384)
|
| 62 |
vectors: Any = app.state.model.encode(
|
| 63 |
-
|
| 64 |
batch_size=32,
|
| 65 |
normalize_embeddings=True,
|
| 66 |
show_progress_bar=False,
|
|
|
|
| 14 |
_MAX_TEXTS = 64
|
| 15 |
_MAX_TEXT_LEN = 2000
|
| 16 |
|
| 17 |
+
# BGE model card specifies this prefix for query embeddings in asymmetric retrieval.
|
| 18 |
+
# Document embeddings must NOT use this prefix — only query-time calls set is_query=True.
|
| 19 |
+
# Paper shows 2-4% NDCG improvement over no-prefix symmetric mode.
|
| 20 |
+
_BGE_QUERY_PREFIX = "Represent this sentence for searching relevant passages: "
|
| 21 |
+
|
| 22 |
|
| 23 |
class EmbedRequest(BaseModel):
|
| 24 |
texts: list[Annotated[str, Field(max_length=_MAX_TEXT_LEN)]] = Field(
|
| 25 |
..., max_length=_MAX_TEXTS
|
| 26 |
)
|
| 27 |
+
is_query: bool = False # True → prepend BGE asymmetric query instruction
|
| 28 |
|
| 29 |
|
| 30 |
class EmbedResponse(BaseModel):
|
|
|
|
| 64 |
async def embed(request: EmbedRequest) -> EmbedResponse:
|
| 65 |
if not request.texts:
|
| 66 |
return EmbedResponse(embeddings=[])
|
| 67 |
+
texts = (
|
| 68 |
+
[_BGE_QUERY_PREFIX + t for t in request.texts]
|
| 69 |
+
if request.is_query
|
| 70 |
+
else request.texts
|
| 71 |
+
)
|
| 72 |
# encode with batch_size=32, returns numpy array shape (N, 384)
|
| 73 |
vectors: Any = app.state.model.encode(
|
| 74 |
+
texts,
|
| 75 |
batch_size=32,
|
| 76 |
normalize_embeddings=True,
|
| 77 |
show_progress_bar=False,
|