PhysicsUncomplicated commited on
Commit
8701a9d
·
verified ·
1 Parent(s): c39644a

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. Dockerfile +17 -0
  3. app.py +48 -0
  4. combined_chunks.json +3 -0
  5. requirements.txt +5 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ combined_chunks.json filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Set working directory
4
+ WORKDIR /app
5
+
6
+ # Copy requirements and install dependencies
7
+ COPY requirements.txt .
8
+ RUN pip install --no-cache-dir -r requirements.txt
9
+
10
+ # Copy rest of the app
11
+ COPY . .
12
+
13
+ # Expose the port (default Flask)
14
+ EXPOSE 7860
15
+
16
+ # Start the app
17
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import numpy as np
3
+ from fastapi import FastAPI, Request
4
+ from pydantic import BaseModel
5
+ from sentence_transformers import SentenceTransformer
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+
8
+ # === Setup ===
9
+ app = FastAPI()
10
+
11
+ app.add_middleware(
12
+ CORSMiddleware,
13
+ allow_origins=["*"],
14
+ allow_methods=["*"],
15
+ allow_headers=["*"],
16
+ )
17
+
18
+ # === Load chunks and their embeddings ===
19
+ with open("combined_chunks.json", "r", encoding="utf-8") as f:
20
+ data = json.load(f)
21
+
22
+ texts = [item["text"] for item in data]
23
+ embeddings = np.array([item["embedding"] for item in data])
24
+
25
+ # === Load Model ===
26
+ model = SentenceTransformer("all-MiniLM-L6-v2")
27
+
28
+ # === API Schema ===
29
+ class Query(BaseModel):
30
+ query: str
31
+
32
+ # === Similarity Function ===
33
+ def cosine_similarity(query_vec, db_vecs):
34
+ query_norm = np.linalg.norm(query_vec)
35
+ db_norms = np.linalg.norm(db_vecs, axis=1)
36
+ return np.dot(db_vecs, query_vec) / (db_norms * query_norm + 1e-10)
37
+
38
+ @app.post("/vector-api/search")
39
+ async def search_vector(q: Query):
40
+ query_embedding = model.encode(q.query)
41
+ scores = cosine_similarity(query_embedding, embeddings)
42
+ top_indices = np.argsort(scores)[::-1][:5]
43
+ top_results = [texts[i] for i in top_indices]
44
+ return {"results": top_results}
45
+
46
+ @app.get("/")
47
+ def root():
48
+ return {"message": "Lightweight Vector API is running!"}
combined_chunks.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f3ee7f4a8e47c0114b419847657e2d89d7722efd3c24544f48b634f82601f7a
3
+ size 129202002
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ flask
2
+ sentence-transformers
3
+ numpy
4
+ chromadb
5
+ faiss-cpu # Optional if using FAISS