Aravindhan-Master commited on
Commit
568609d
·
1 Parent(s): 6bac01d

[feat]: initial commit

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. Dockerfile +13 -0
  3. main.py +36 -0
  4. requirements.txt +57 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ venv
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import CrossEncoder
2
+ from fastapi import FastAPI
3
+ from pydantic import BaseModel
4
+
5
+ app = FastAPI()
6
+
7
+ class ChunkSimilarityRequest(BaseModel):
8
+ query: str
9
+ chunks: list[str]
10
+ top_k: int = 5
11
+
12
+
13
+ model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
14
+
15
+
16
+ def cross_encoder_similarity(query, candidates):
17
+ pairs = [[query, candidate] for candidate in candidates]
18
+ scores = model.predict(pairs)
19
+ return scores
20
+
21
+
22
+
23
+ @app.get("/keyword-similarity")
24
+ async def cross_encoding_similarity():
25
+ return {"status": "success"}
26
+
27
+
28
+ @app.post("/keyword-similarity")
29
+ async def cross_encoding_similarity(
30
+ payload: ChunkSimilarityRequest
31
+ ):
32
+ scores = cross_encoder_similarity(payload.query, payload.chunks)
33
+
34
+ return [
35
+ chunk for chunk, _ in sorted(zip(payload.chunks, scores), key=lambda x: x[1], reverse=True)[:payload.top_k]
36
+ ]
requirements.txt ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ annotated-types==0.7.0
2
+ anyio==4.9.0
3
+ certifi==2025.4.26
4
+ charset-normalizer==3.4.2
5
+ click==8.2.1
6
+ fastapi==0.115.12
7
+ filelock==3.18.0
8
+ fsspec==2025.5.1
9
+ h11==0.16.0
10
+ hf-xet==1.1.3
11
+ huggingface-hub==0.32.4
12
+ idna==3.10
13
+ Jinja2==3.1.6
14
+ joblib==1.5.1
15
+ MarkupSafe==3.0.2
16
+ mpmath==1.3.0
17
+ networkx==3.5
18
+ numpy==2.2.6
19
+ nvidia-cublas-cu12==12.6.4.1
20
+ nvidia-cuda-cupti-cu12==12.6.80
21
+ nvidia-cuda-nvrtc-cu12==12.6.77
22
+ nvidia-cuda-runtime-cu12==12.6.77
23
+ nvidia-cudnn-cu12==9.5.1.17
24
+ nvidia-cufft-cu12==11.3.0.4
25
+ nvidia-cufile-cu12==1.11.1.6
26
+ nvidia-curand-cu12==10.3.7.77
27
+ nvidia-cusolver-cu12==11.7.1.2
28
+ nvidia-cusparse-cu12==12.5.4.2
29
+ nvidia-cusparselt-cu12==0.6.3
30
+ nvidia-nccl-cu12==2.26.2
31
+ nvidia-nvjitlink-cu12==12.6.85
32
+ nvidia-nvtx-cu12==12.6.77
33
+ packaging==25.0
34
+ pillow==11.2.1
35
+ pydantic==2.11.5
36
+ pydantic_core==2.33.2
37
+ PyYAML==6.0.2
38
+ regex==2024.11.6
39
+ requests==2.32.3
40
+ safetensors==0.5.3
41
+ scikit-learn==1.6.1
42
+ scipy==1.15.3
43
+ sentence-transformers==4.1.0
44
+ setuptools==80.9.0
45
+ sniffio==1.3.1
46
+ starlette==0.46.2
47
+ sympy==1.14.0
48
+ threadpoolctl==3.6.0
49
+ tokenizers==0.21.1
50
+ torch==2.7.1
51
+ tqdm==4.67.1
52
+ transformers==4.52.4
53
+ triton==3.3.1
54
+ typing-inspection==0.4.1
55
+ typing_extensions==4.14.0
56
+ urllib3==2.4.0
57
+ uvicorn==0.34.3