Huseyin Kir commited on
Commit
21e9c76
·
1 Parent(s): 9057a71

first commit

Browse files
Files changed (3) hide show
  1. Dockerfile +13 -0
  2. app.py +34 -0
  3. requirements.txt +9 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ import lancedb
3
+ from sentence_transformers import SentenceTransformer
4
+ from huggingface_hub import snapshot_download
5
+
6
+ app = FastAPI()
7
+
8
+ # 1. Download ONLY the LanceDB folder (Saved space/time by ignoring FAISS)
9
+ print("⏳ Downloading LanceDB index...")
10
+ index_path = snapshot_download(
11
+ repo_id="hkir-dev/ndl-core-rag-index",
12
+ repo_type="dataset",
13
+ allow_patterns="lancedb_search_index/*" # only need this folder, not the FAISS one
14
+ )
15
+
16
+ # 2. Connect DB and load model
17
+ db = lancedb.connect(f"{index_path}/lancedb_search_index")
18
+ table = db.open_table("ndl_core_datasets")
19
+ all_columns = table.schema.names
20
+ columns_to_select = [col for col in all_columns if col != "vector"]
21
+
22
+ model = SentenceTransformer('all-MiniLM-L6-v2')
23
+
24
+ @app.get("/search")
25
+ def search(query: str, limit: int = 5):
26
+ query_vector = model.encode(query)
27
+ results = (
28
+ table.search(query_vector) # Your vector search
29
+ .metric("cosine") # Ensure metric matches your index
30
+ .select(columns_to_select) # <--- The key step: explicit column selection
31
+ .limit(5) # Number of results
32
+ .to_pandas() # Convert to DataFrame
33
+ )
34
+ return results.to_dict(orient='records')
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ lancedb
4
+ sentence-transformers
5
+ pandas
6
+ huggingface-hub
7
+ pyarrow
8
+ torch
9
+ numpy