Vizznu19 commited on
Commit
cc0bbc0
·
verified ·
1 Parent(s): 9d09f85

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +72 -0
main.py CHANGED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from fastapi import FastAPI
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from pydantic import BaseModel
5
+ from sentence_transformers import SentenceTransformer
6
+ import faiss
7
+ import numpy as np
8
+ import re
9
+
10
+ app = FastAPI()
11
+
12
+ # Allow CORS for local frontend
13
+ app.add_middleware(
14
+ CORSMiddleware,
15
+ allow_origins=["*"],
16
+ allow_credentials=True,
17
+ allow_methods=["*"],
18
+ allow_headers=["*"],
19
+ )
20
+
21
+ # Load data
22
+ faq_df = pd.read_csv("BankFAQs.csv", usecols=["Question", "Answer"])
23
+ questions = faq_df["Question"].astype(str).tolist()
24
+ answers = faq_df["Answer"].astype(str).tolist()
25
+
26
+ # Chunking function: split text into sentences
27
+ sentence_splitter = re.compile(r'(?<=[.!?]) +')
28
+ def chunk_text(text):
29
+ return [chunk.strip() for chunk in sentence_splitter.split(text) if chunk.strip()]
30
+
31
+ # Prepare chunked data
32
+ chunked_questions = [] # Parent question for each chunk
33
+ chunks = [] # The actual chunk text
34
+ chunked_answers = [] # Full answer for reference
35
+ for q, a in zip(questions, answers):
36
+ answer_chunks = chunk_text(a)
37
+ for chunk in answer_chunks:
38
+ chunked_questions.append(q)
39
+ chunks.append(chunk)
40
+ chunked_answers.append(a)
41
+
42
+ # Load model and build index
43
+ model = SentenceTransformer("all-MiniLM-L6-v2")
44
+ chunk_embeddings = model.encode(chunks)
45
+ chunk_embeddings = np.array(chunk_embeddings).astype("float32")
46
+ chunk_index = faiss.IndexFlatL2(chunk_embeddings.shape[1])
47
+ chunk_index.add(chunk_embeddings)
48
+
49
+ class QueryRequest(BaseModel):
50
+ query: str
51
+ k: int = 1
52
+
53
+ @app.post("/search")
54
+ async def search_faq(req: QueryRequest):
55
+ query_embedding = model.encode([req.query]).astype("float32")
56
+ D, I = chunk_index.search(query_embedding, req.k)
57
+ # Calculate cosine similarity from L2 distance
58
+ # cosine_sim = 1 - (L2_distance^2 / 2)
59
+ similarities = 1 - (D[0] / 2)
60
+ threshold = 0.6
61
+ results = []
62
+ for idx, sim in zip(I[0], similarities):
63
+ if sim >= threshold:
64
+ results.append({
65
+ "question": chunked_questions[idx],
66
+ "full_answer": chunked_answers[idx]
67
+ })
68
+ return {"results": results}
69
+
70
+ @app.get("/")
71
+ def root():
72
+ return {"message": "FAQ Assistant is running with chunking."}