File size: 803 Bytes
568609d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c88f52d
568609d
 
 
 
 
 
dad38f2
c88f52d
568609d
 
 
dad38f2
568609d
 
 
c88f52d
568609d
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from sentence_transformers import CrossEncoder
from fastapi import FastAPI
from pydantic import BaseModel

app = FastAPI()

class ChunkSimilarityRequest(BaseModel):
    query: str
    chunks: list[str]
    top_k: int = 5


model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')


def keyword_similarity(query, candidates):
    pairs = [[query, candidate] for candidate in candidates]
    scores = model.predict(pairs)
    return scores



@app.get("/")
async def health():
    return {"status": "success"}


@app.post("/")
async def cross_encoding_similarity(
    payload: ChunkSimilarityRequest
):
    scores = keyword_similarity(payload.query, payload.chunks)

    return [
        chunk for chunk, _ in sorted(zip(payload.chunks, scores), key=lambda x: x[1], reverse=True)[:payload.top_k]
    ]